aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.4.0/gcc/config/i386
diff options
context:
space:
mode:
authorJing Yu <jingyu@google.com>2009-11-05 15:11:04 -0800
committerJing Yu <jingyu@google.com>2009-11-05 15:11:04 -0800
commitdf62c1c110e8532b995b23540b7e3695729c0779 (patch)
treedbbd4cbdb50ac38011e058a2533ee4c3168b0205 /gcc-4.4.0/gcc/config/i386
parent8d401cf711539af5a2f78d12447341d774892618 (diff)
downloadtoolchain_gcc-df62c1c110e8532b995b23540b7e3695729c0779.tar.gz
toolchain_gcc-df62c1c110e8532b995b23540b7e3695729c0779.tar.bz2
toolchain_gcc-df62c1c110e8532b995b23540b7e3695729c0779.zip
Check in gcc sources for prebuilt toolchains in Eclair.
Diffstat (limited to 'gcc-4.4.0/gcc/config/i386')
-rw-r--r--gcc-4.4.0/gcc/config/i386/ammintrin.h88
-rw-r--r--gcc-4.4.0/gcc/config/i386/athlon.md1168
-rw-r--r--gcc-4.4.0/gcc/config/i386/att.h91
-rw-r--r--gcc-4.4.0/gcc/config/i386/avxintrin.h1426
-rw-r--r--gcc-4.4.0/gcc/config/i386/biarch64.h29
-rw-r--r--gcc-4.4.0/gcc/config/i386/bmmintrin.h1261
-rw-r--r--gcc-4.4.0/gcc/config/i386/bsd.h99
-rw-r--r--gcc-4.4.0/gcc/config/i386/chkstk.asm148
-rw-r--r--gcc-4.4.0/gcc/config/i386/constraints.md168
-rw-r--r--gcc-4.4.0/gcc/config/i386/cpuid.h177
-rw-r--r--gcc-4.4.0/gcc/config/i386/cross-stdarg.h73
-rw-r--r--gcc-4.4.0/gcc/config/i386/crtdll.h42
-rw-r--r--gcc-4.4.0/gcc/config/i386/crtfastmath.c89
-rw-r--r--gcc-4.4.0/gcc/config/i386/crtprec.c47
-rwxr-xr-xgcc-4.4.0/gcc/config/i386/cygming-crtbegin.c135
-rwxr-xr-xgcc-4.4.0/gcc/config/i386/cygming-crtend.c88
-rw-r--r--gcc-4.4.0/gcc/config/i386/cygming.h404
-rw-r--r--gcc-4.4.0/gcc/config/i386/cygming.opt47
-rw-r--r--gcc-4.4.0/gcc/config/i386/cygwin.asm126
-rw-r--r--gcc-4.4.0/gcc/config/i386/cygwin.h269
-rw-r--r--gcc-4.4.0/gcc/config/i386/cygwin1.c53
-rw-r--r--gcc-4.4.0/gcc/config/i386/cygwin2.c66
-rw-r--r--gcc-4.4.0/gcc/config/i386/darwin-libgcc.10.4.ver81
-rw-r--r--gcc-4.4.0/gcc/config/i386/darwin-libgcc.10.5.ver85
-rw-r--r--gcc-4.4.0/gcc/config/i386/darwin.h302
-rw-r--r--gcc-4.4.0/gcc/config/i386/darwin64.h35
-rw-r--r--gcc-4.4.0/gcc/config/i386/djgpp.h187
-rw-r--r--gcc-4.4.0/gcc/config/i386/djgpp.opt25
-rw-r--r--gcc-4.4.0/gcc/config/i386/driver-i386.c613
-rw-r--r--gcc-4.4.0/gcc/config/i386/emmintrin.h1513
-rw-r--r--gcc-4.4.0/gcc/config/i386/freebsd.h140
-rw-r--r--gcc-4.4.0/gcc/config/i386/freebsd64.h43
-rw-r--r--gcc-4.4.0/gcc/config/i386/gas.h127
-rw-r--r--gcc-4.4.0/gcc/config/i386/geode.md152
-rw-r--r--gcc-4.4.0/gcc/config/i386/gmm_malloc.h74
-rw-r--r--gcc-4.4.0/gcc/config/i386/gmon-sol2.c459
-rw-r--r--gcc-4.4.0/gcc/config/i386/gnu.h56
-rw-r--r--gcc-4.4.0/gcc/config/i386/gstabs.h7
-rw-r--r--gcc-4.4.0/gcc/config/i386/gthr-win32.c260
-rwxr-xr-xgcc-4.4.0/gcc/config/i386/host-cygwin.c80
-rw-r--r--gcc-4.4.0/gcc/config/i386/host-i386-darwin.c30
-rw-r--r--gcc-4.4.0/gcc/config/i386/host-mingw32.c175
-rw-r--r--gcc-4.4.0/gcc/config/i386/i386-aout.h25
-rw-r--r--gcc-4.4.0/gcc/config/i386/i386-c.c351
-rw-r--r--gcc-4.4.0/gcc/config/i386/i386-coff.h69
-rw-r--r--gcc-4.4.0/gcc/config/i386/i386-interix.h362
-rw-r--r--gcc-4.4.0/gcc/config/i386/i386-interix3.h23
-rw-r--r--gcc-4.4.0/gcc/config/i386/i386-modes.def92
-rw-r--r--gcc-4.4.0/gcc/config/i386/i386-protos.h280
-rw-r--r--gcc-4.4.0/gcc/config/i386/i386.c30118
-rw-r--r--gcc-4.4.0/gcc/config/i386/i386.h2552
-rw-r--r--gcc-4.4.0/gcc/config/i386/i386.md21982
-rw-r--r--gcc-4.4.0/gcc/config/i386/i386.opt352
-rw-r--r--gcc-4.4.0/gcc/config/i386/i386elf.h125
-rw-r--r--gcc-4.4.0/gcc/config/i386/immintrin.h59
-rw-r--r--gcc-4.4.0/gcc/config/i386/k6.md267
-rw-r--r--gcc-4.4.0/gcc/config/i386/kfreebsd-gnu.h25
-rw-r--r--gcc-4.4.0/gcc/config/i386/knetbsd-gnu.h23
-rw-r--r--gcc-4.4.0/gcc/config/i386/kopensolaris-gnu.h22
-rw-r--r--gcc-4.4.0/gcc/config/i386/libgcc-glibc.ver165
-rw-r--r--gcc-4.4.0/gcc/config/i386/linux-unwind.h177
-rw-r--r--gcc-4.4.0/gcc/config/i386/linux.h217
-rw-r--r--gcc-4.4.0/gcc/config/i386/linux64.h123
-rw-r--r--gcc-4.4.0/gcc/config/i386/lynx.h90
-rw-r--r--gcc-4.4.0/gcc/config/i386/mach.h20
-rw-r--r--gcc-4.4.0/gcc/config/i386/mingw.opt23
-rw-r--r--gcc-4.4.0/gcc/config/i386/mingw32.h221
-rw-r--r--gcc-4.4.0/gcc/config/i386/mm3dnow.h215
-rw-r--r--gcc-4.4.0/gcc/config/i386/mmintrin-common.h155
-rw-r--r--gcc-4.4.0/gcc/config/i386/mmintrin.h921
-rw-r--r--gcc-4.4.0/gcc/config/i386/mmx.md1628
-rw-r--r--gcc-4.4.0/gcc/config/i386/msformat-c.c197
-rw-r--r--gcc-4.4.0/gcc/config/i386/netbsd-elf.h124
-rw-r--r--gcc-4.4.0/gcc/config/i386/netbsd.h72
-rw-r--r--gcc-4.4.0/gcc/config/i386/netbsd64.h72
-rw-r--r--gcc-4.4.0/gcc/config/i386/netware-crt0.c79
-rw-r--r--gcc-4.4.0/gcc/config/i386/netware-libgcc.c58
-rw-r--r--gcc-4.4.0/gcc/config/i386/netware-libgcc.def2
-rw-r--r--gcc-4.4.0/gcc/config/i386/netware-libgcc.exp83
-rw-r--r--gcc-4.4.0/gcc/config/i386/netware.c256
-rw-r--r--gcc-4.4.0/gcc/config/i386/netware.h168
-rw-r--r--gcc-4.4.0/gcc/config/i386/nmmintrin.h37
-rw-r--r--gcc-4.4.0/gcc/config/i386/nto.h93
-rw-r--r--gcc-4.4.0/gcc/config/i386/nwld.c73
-rw-r--r--gcc-4.4.0/gcc/config/i386/nwld.h62
-rw-r--r--gcc-4.4.0/gcc/config/i386/openbsd.h101
-rw-r--r--gcc-4.4.0/gcc/config/i386/openbsdelf.h131
-rw-r--r--gcc-4.4.0/gcc/config/i386/pentium.md306
-rw-r--r--gcc-4.4.0/gcc/config/i386/pmm_malloc.h57
-rw-r--r--gcc-4.4.0/gcc/config/i386/pmmintrin.h128
-rw-r--r--gcc-4.4.0/gcc/config/i386/ppro.md758
-rw-r--r--gcc-4.4.0/gcc/config/i386/predicates.md1130
-rw-r--r--gcc-4.4.0/gcc/config/i386/rtemself.h32
-rw-r--r--gcc-4.4.0/gcc/config/i386/sfp-machine.h5
-rw-r--r--gcc-4.4.0/gcc/config/i386/smmintrin.h724
-rw-r--r--gcc-4.4.0/gcc/config/i386/sol2-10.h124
-rw-r--r--gcc-4.4.0/gcc/config/i386/sol2-c1.asm151
-rw-r--r--gcc-4.4.0/gcc/config/i386/sol2-ci.asm40
-rw-r--r--gcc-4.4.0/gcc/config/i386/sol2-cn.asm35
-rw-r--r--gcc-4.4.0/gcc/config/i386/sol2-gc1.asm155
-rw-r--r--gcc-4.4.0/gcc/config/i386/sol2.h114
-rw-r--r--gcc-4.4.0/gcc/config/i386/sse.md11980
-rw-r--r--gcc-4.4.0/gcc/config/i386/sync.md347
-rw-r--r--gcc-4.4.0/gcc/config/i386/sysv4.h132
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-crtfm8
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-crtpc16
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-crtpic10
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-crtstuff7
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-cygming82
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-cygwin30
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-darwin4
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-darwin648
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-djgpp2
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-dw2-eh3
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-fprules-softfp6
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-gmm_malloc6
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-gthr-win323
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-i38616
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-i386elf4
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-interix4
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-linux5
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-linux6417
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-mingw325
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-netware9
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-nto4
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-nwld30
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-openbsd6
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-pmm_malloc6
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-rtems-i38651
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-sjlj-eh3
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-sol2-1011
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-svr3dbx7
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-vxworks8
-rw-r--r--gcc-4.4.0/gcc/config/i386/t-vxworksae5
-rw-r--r--gcc-4.4.0/gcc/config/i386/tmmintrin.h244
-rw-r--r--gcc-4.4.0/gcc/config/i386/unix.h76
-rw-r--r--gcc-4.4.0/gcc/config/i386/vx-common.h26
-rw-r--r--gcc-4.4.0/gcc/config/i386/vxworks.h78
-rw-r--r--gcc-4.4.0/gcc/config/i386/vxworksae.h26
-rwxr-xr-xgcc-4.4.0/gcc/config/i386/w32-unwind.h212
-rw-r--r--gcc-4.4.0/gcc/config/i386/winnt-cxx.c143
-rw-r--r--gcc-4.4.0/gcc/config/i386/winnt-stubs.c52
-rw-r--r--gcc-4.4.0/gcc/config/i386/winnt.c644
-rw-r--r--gcc-4.4.0/gcc/config/i386/wmmintrin.h120
-rw-r--r--gcc-4.4.0/gcc/config/i386/x-cygwin4
-rw-r--r--gcc-4.4.0/gcc/config/i386/x-darwin4
-rw-r--r--gcc-4.4.0/gcc/config/i386/x-i3864
-rw-r--r--gcc-4.4.0/gcc/config/i386/x-mingw3213
-rw-r--r--gcc-4.4.0/gcc/config/i386/x86-64.h98
-rw-r--r--gcc-4.4.0/gcc/config/i386/x86intrin.h70
-rw-r--r--gcc-4.4.0/gcc/config/i386/xm-cygwin.h22
-rw-r--r--gcc-4.4.0/gcc/config/i386/xm-djgpp.h84
-rw-r--r--gcc-4.4.0/gcc/config/i386/xm-mingw32.h35
-rw-r--r--gcc-4.4.0/gcc/config/i386/xmmintrin.h1251
154 files changed, 92628 insertions, 0 deletions
diff --git a/gcc-4.4.0/gcc/config/i386/ammintrin.h b/gcc-4.4.0/gcc/config/i386/ammintrin.h
new file mode 100644
index 000000000..3647b3193
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/ammintrin.h
@@ -0,0 +1,88 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the AMD Programmers
+ Manual Update, version 2.x */
+
+#ifndef _AMMINTRIN_H_INCLUDED
+#define _AMMINTRIN_H_INCLUDED
+
+#ifndef __SSE4A__
+# error "SSE4A instruction set not enabled"
+#else
+
+/* We need definitions from the SSE3, SSE2 and SSE header files*/
+#include <pmmintrin.h>
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_sd (double * __P, __m128d __Y)
+{
+ __builtin_ia32_movntsd (__P, (__v2df) __Y);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_ss (float * __P, __m128 __Y)
+{
+ __builtin_ia32_movntss (__P, (__v4sf) __Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_si64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
+{
+ return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
+}
+#else
+#define _mm_extracti_si64(X, I, L) \
+ ((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X), \
+ (unsigned int)(I), (unsigned int)(L)))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_si64 (__m128i __X,__m128i __Y)
+{
+ return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
+{
+ return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
+}
+#else
+#define _mm_inserti_si64(X, Y, I, L) \
+ ((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), \
+ (unsigned int)(I), (unsigned int)(L)))
+#endif
+
+#endif /* __SSE4A__ */
+
+#endif /* _AMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/athlon.md b/gcc-4.4.0/gcc/config/i386/athlon.md
new file mode 100644
index 000000000..c9860def7
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/athlon.md
@@ -0,0 +1,1168 @@
+;; AMD Athlon Scheduling
+;;
+;; The Athlon does contain three pipelined FP units, three integer units and
+;; three address generation units.
+;;
+;; The predecode logic is determining boundaries of instructions in the 64
+;; byte cache line. So the cache line straddling problem of K6 might be issue
+;; here as well, but it is not noted in the documentation.
+;;
+;; Three DirectPath instructions decoders and only one VectorPath decoder
+;; is available. They can decode three DirectPath instructions or one VectorPath
+;; instruction per cycle.
+;; Decoded macro instructions are then passed to 72 entry instruction control
+;; unit, that passes
+;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
+;;
+;; The load/store queue unit is not attached to the schedulers but
+;; communicates with all the execution units separately instead.
+
+(define_attr "athlon_decode" "direct,vector,double"
+ (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,leave")
+ (const_string "vector")
+ (and (eq_attr "type" "push")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "load,store")
+ (eq_attr "mode" "XF")))
+ (const_string "vector")]
+ (const_string "direct")))
+
+(define_attr "amdfam10_decode" "direct,vector,double"
+ (const_string "direct"))
+;;
+;; decode0 decode1 decode2
+;; \ | /
+;; instruction control unit (72 entry scheduler)
+;; | |
+;; integer scheduler (18) stack map
+;; / | | | | \ stack rename
+;; ieu0 agu0 ieu1 agu1 ieu2 agu2 scheduler
+;; | agu0 | agu1 agu2 register file
+;; | \ | | / | | |
+;; \ /\ | / fadd fmul fstore
+;; \ / \ | / fadd fmul fstore
+;; imul load/store (2x) fadd fmul fstore
+
+(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp")
+(define_cpu_unit "athlon-decode0" "athlon")
+(define_cpu_unit "athlon-decode1" "athlon")
+(define_cpu_unit "athlon-decode2" "athlon")
+(define_cpu_unit "athlon-decodev" "athlon")
+;; Model the fact that double decoded instruction may take 2 cycles
+;; to decode when decoder2 and decoder0 in next cycle
+;; is used (this is needed to allow troughput of 1.5 double decoded
+;; instructions per cycle).
+;;
+;; In order to avoid dependence between reservation of decoder
+;; and other units, we model decoder as two stage fully pipelined unit
+;; and only double decoded instruction may occupy unit in the first cycle.
+;; With this scheme however two double instructions can be issued cycle0.
+;;
+;; Avoid this by using presence set requiring decoder0 to be allocated
+;; too. Vector decoded instructions then can't be issued when
+;; modeled as consuming decoder0+decoder1+decoder2.
+;; We solve that by specialized vector decoder unit and exclusion set.
+(presence_set "athlon-decode2" "athlon-decode0")
+(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2")
+(define_reservation "athlon-vector" "nothing,athlon-decodev")
+(define_reservation "athlon-direct0" "nothing,athlon-decode0")
+(define_reservation "athlon-direct" "nothing,
+ (athlon-decode0 | athlon-decode1
+ | athlon-decode2)")
+;; Double instructions behaves like two direct instructions.
+(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0)
+ | (nothing,(athlon-decode0 + athlon-decode1))
+ | (nothing,(athlon-decode1 + athlon-decode2)))")
+
+;; Agu and ieu unit results in extremely large automatons and
+;; in our approximation they are hardly filled in. Only ieu
+;; unit can, as issue rate is 3 and agu unit is always used
+;; first in the insn reservations. Skip the models.
+
+;(define_cpu_unit "athlon-ieu0" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu1" "athlon_ieu")
+;(define_cpu_unit "athlon-ieu2" "athlon_ieu")
+;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
+(define_reservation "athlon-ieu" "nothing")
+(define_cpu_unit "athlon-ieu0" "athlon")
+;(define_cpu_unit "athlon-agu0" "athlon_agu")
+;(define_cpu_unit "athlon-agu1" "athlon_agu")
+;(define_cpu_unit "athlon-agu2" "athlon_agu")
+;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
+(define_reservation "athlon-agu" "nothing")
+
+(define_cpu_unit "athlon-mult" "athlon_mult")
+
+(define_cpu_unit "athlon-load0" "athlon_load")
+(define_cpu_unit "athlon-load1" "athlon_load")
+(define_reservation "athlon-load" "athlon-agu,
+ (athlon-load0 | athlon-load1),nothing")
+;; 128bit SSE instructions issue two loads at once
+(define_reservation "athlon-load2" "athlon-agu,
+ (athlon-load0 + athlon-load1),nothing")
+
+(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)")
+;; 128bit SSE instructions issue two stores at once
+(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)")
+
+
+;; The FP operations start to execute at stage 12 in the pipeline, while
+;; integer operations start to execute at stage 9 for Athlon and 11 for K8
+;; Compensate the difference for Athlon because it results in significantly
+;; smaller automata.
+(define_reservation "athlon-fpsched" "nothing,nothing,nothing")
+;; The floating point loads.
+(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)")
+(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)")
+(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)")
+(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)")
+
+
+;; The three fp units are fully pipelined with latency of 3
+(define_cpu_unit "athlon-fadd" "athlon_fp")
+(define_cpu_unit "athlon-fmul" "athlon_fp")
+(define_cpu_unit "athlon-fstore" "athlon_fp")
+(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
+(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
+
+;; Vector operations usually consume many of pipes.
+(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
+
+
+;; Jump instructions are executed in the branch unit completely transparent to us
+(define_insn_reservation "athlon_branch" 0
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (eq_attr "type" "ibr"))
+ "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_call" 0
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "call,callv"))
+ "athlon-vector,athlon-ieu")
+(define_insn_reservation "athlon_call_amdfam10" 0
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "call,callv"))
+ "athlon-double,athlon-ieu")
+
+;; Latency of push operation is 3 cycles, but ESP value is available
+;; earlier
+(define_insn_reservation "athlon_push" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (eq_attr "type" "push"))
+ "athlon-direct,athlon-agu,athlon-store")
+(define_insn_reservation "athlon_pop" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "pop"))
+ "athlon-vector,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_pop_k8" 3
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "pop"))
+ "athlon-double,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_pop_amdfam10" 3
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "pop"))
+ "athlon-direct,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_leave" 3
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "leave"))
+ "athlon-vector,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_leave_k8" 3
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (eq_attr "type" "leave"))
+ "athlon-double,(athlon-ieu+athlon-load)")
+
+;; Lea executes in AGU unit with 2 cycles latency.
+(define_insn_reservation "athlon_lea" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "lea"))
+ "athlon-direct,athlon-agu,nothing")
+;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10
+(define_insn_reservation "athlon_lea_amdfam10" 1
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "lea"))
+ "athlon-direct,athlon-agu,nothing")
+
+;; Mul executes in special multiplier unit attached to IEU0
+(define_insn_reservation "athlon_imul" 5
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "none,unknown")))
+ "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
+;; ??? Widening multiply is vector or double.
+(define_insn_reservation "athlon_imul_k8_DI" 4
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
+(define_insn_reservation "athlon_imul_k8" 3
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "none,unknown")))
+ "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
+(define_insn_reservation "athlon_imul_amdfam10_HI" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
+(define_insn_reservation "athlon_imul_mem" 8
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "load,both")))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
+(define_insn_reservation "athlon_imul_mem_k8_DI" 7
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "memory" "load,both"))))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
+(define_insn_reservation "athlon_imul_mem_k8" 6
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "load,both")))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
+
+;; Idiv cannot execute in parallel with other instructions. Dealing with it
+;; as with short latency vector instruction is good approximation avoiding
+;; scheduler from trying too hard to can hide it's latency by overlap with
+;; other instructions.
+;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
+;; of the other code
+;; Using the same heuristics for amdfam10 as K8 with idiv
+
+(define_insn_reservation "athlon_idiv" 6
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "memory" "none,unknown")))
+ "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
+(define_insn_reservation "athlon_idiv_mem" 9
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "memory" "load,both")))
+ "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
+;; The parallelism of string instructions is not documented. Model it same way
+;; as idiv to create smaller automata. This probably does not matter much.
+;; Using the same heuristics for amdfam10 as K8 with idiv
+(define_insn_reservation "athlon_str" 6
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (and (eq_attr "type" "str")
+ (eq_attr "memory" "load,both,store")))
+ "athlon-vector,athlon-load,athlon-ieu0*6")
+
+(define_insn_reservation "athlon_idirect" 1
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_idirect_amdfam10" 1
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_ivector" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-vector,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_ivector_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "none,unknown"))))
+ "athlon-vector,athlon-ieu,athlon-ieu")
+
+(define_insn_reservation "athlon_idirect_loadmov" 3
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-load")
+
+(define_insn_reservation "athlon_idirect_load" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_idirect_load_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_ivector_load" 6
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_ivector_load_amdfam10" 6
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+
+(define_insn_reservation "athlon_idirect_movstore" 1
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "store")))
+ "athlon-direct,athlon-agu,athlon-store")
+
+(define_insn_reservation "athlon_idirect_both" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-direct,athlon-load,
+ athlon-ieu,athlon-store,
+ athlon-store")
+(define_insn_reservation "athlon_idirect_both_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-direct,athlon-load,
+ athlon-ieu,athlon-store,
+ athlon-store")
+
+(define_insn_reservation "athlon_ivector_both" 6
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-vector,athlon-load,
+ athlon-ieu,
+ athlon-ieu,
+ athlon-store")
+(define_insn_reservation "athlon_ivector_both_amdfam10" 6
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "both"))))
+ "athlon-vector,athlon-load,
+ athlon-ieu,
+ athlon-ieu,
+ athlon-store")
+
+(define_insn_reservation "athlon_idirect_store" 1
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-direct,(athlon-ieu+athlon-agu),
+ athlon-store")
+(define_insn_reservation "athlon_idirect_store_amdfam10" 1
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-direct,(athlon-ieu+athlon-agu),
+ athlon-store")
+
+(define_insn_reservation "athlon_ivector_store" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
+ athlon-store")
+(define_insn_reservation "athlon_ivector_store_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "unit" "integer,unknown")
+ (eq_attr "memory" "store"))))
+ "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
+ athlon-store")
+
+;; Athlon floatin point unit
+(define_insn_reservation "athlon_fldxf" 12
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "load")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,athlon-fpload2,athlon-fvector*9")
+(define_insn_reservation "athlon_fldxf_k8" 13
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "load")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,athlon-fpload2k8,athlon-fvector*9")
+;; Assume superforwarding to take place so effective latency of fany op is 0.
+(define_insn_reservation "athlon_fld" 0
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_fld_k8" 2
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fstore")
+
+(define_insn_reservation "athlon_fstxf" 10
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "store,both")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
+(define_insn_reservation "athlon_fstxf_k8" 8
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "store,both")
+ (eq_attr "mode" "XF"))))
+ "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))")
+(define_insn_reservation "athlon_fst" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fst_k8" 2
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fist" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (eq_attr "type" "fistp,fisttp"))
+ "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_fmov" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (eq_attr "type" "fmov"))
+ "athlon-direct,athlon-fpsched,athlon-faddmul")
+(define_insn_reservation "athlon_fadd_load" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fop")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fadd_load_k8" 6
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "fop")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fadd" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (eq_attr "type" "fop"))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_fmul_load" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fmul")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_fmul_load_k8" 6
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "fmul")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_fmul" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (eq_attr "type" "fmul"))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fsgn" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (eq_attr "type" "fsgn"))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_load" 24
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_load_k8" 13
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "fdiv")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_fdiv" 24
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fdiv"))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fdiv_k8" 11
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (eq_attr "type" "fdiv"))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_fpspc_load" 103
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (and (eq_attr "type" "fpspc")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload,athlon-fvector")
+(define_insn_reservation "athlon_fpspc" 100
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (eq_attr "type" "fpspc"))
+ "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_load" 7
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fcmov")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload,athlon-fvector")
+(define_insn_reservation "athlon_fcmov" 7
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "fcmov"))
+ "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_load_k8" 17
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "fcmov")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fploadk8,athlon-fvector")
+(define_insn_reservation "athlon_fcmov_k8" 15
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (eq_attr "type" "fcmov"))
+ "athlon-vector,athlon-fpsched,athlon-fvector")
+;; fcomi is vector decoded by uses only one pipe.
+(define_insn_reservation "athlon_fcomi_load" 3
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fcmp")
+ (and (eq_attr "athlon_decode" "vector")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fcomi_load_k8" 5
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "fcmp")
+ (and (eq_attr "athlon_decode" "vector")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fcomi" 3
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (and (eq_attr "athlon_decode" "vector")
+ (eq_attr "type" "fcmp")))
+ "athlon-vector,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_fcom_load" 2
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "fcmp")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_fcom_load_k8" 4
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "fcmp")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_fcom" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (eq_attr "type" "fcmp"))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
+;; Never seen by the scheduler because we still don't do post reg-stack
+;; scheduling.
+;(define_insn_reservation "athlon_fxch" 2
+; (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+; (eq_attr "type" "fxch"))
+; "athlon-direct,athlon-fpsched,athlon-fany")
+
+;; Athlon handle MMX operations in the FPU unit with shorter latencies
+
+(define_insn_reservation "athlon_movlpd_load" 0
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemov")
+ (match_operand:DF 1 "memory_operand" "")))
+ "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_movlpd_load_k8" 2
+ (and (eq_attr "cpu" "k8")
+ (and (eq_attr "type" "ssemov")
+ (match_operand:DF 1 "memory_operand" "")))
+ "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_movsd_load_generic64" 2
+ (and (eq_attr "cpu" "generic64")
+ (and (eq_attr "type" "ssemov")
+ (match_operand:DF 1 "memory_operand" "")))
+ "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fmul)")
+(define_insn_reservation "athlon_movaps_load_k8" 2
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "load"))))
+ "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore")
+(define_insn_reservation "athlon_movaps_load" 0
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)")
+(define_insn_reservation "athlon_movss_load" 1
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "mode" "SF,DI")
+ (eq_attr "memory" "load"))))
+ "athlon-vector,athlon-fpload,(athlon-fany*2)")
+(define_insn_reservation "athlon_movss_load_k8" 1
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "mode" "SF,DI")
+ (eq_attr "memory" "load"))))
+ "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)")
+(define_insn_reservation "athlon_mmxsseld" 0
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fany")
+(define_insn_reservation "athlon_mmxsseld_k8" 2
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fstore")
+;; On AMDFAM10 all double, single and integer packed and scalar SSEx data
+;; loads generated are direct path, latency of 2 and do not use any FP
+;; executions units. No separate entries for movlpx/movhpx loads, which
+;; are direct path, latency of 4 and use the FADD/FMUL FP execution units,
+;; as they will not be generated.
+(define_insn_reservation "athlon_sseld_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssemov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8")
+;; On AMDFAM10 MMX data loads generated are direct path, latency of 4
+;; and can use any FP executions units
+(define_insn_reservation "athlon_mmxld_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "mmxmov")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8, athlon-fany")
+(define_insn_reservation "athlon_mmxssest" 3
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "store,both"))))
+ "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
+(define_insn_reservation "athlon_mmxssest_k8" 3
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "store,both"))))
+ "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
+(define_insn_reservation "athlon_mmxssest_short" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+;; On AMDFAM10 all double, single and integer packed SSEx data stores
+;; generated are all double path, latency of 2 and use the FSTORE FP
+;; execution unit. No entries separate for movupx/movdqu, which are
+;; vector path, latency of 3 and use the FSTORE*2 FP execution unit,
+;; as they will not be generated.
+(define_insn_reservation "athlon_ssest_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "store,both"))))
+ "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)")
+;; On AMDFAM10 all double, single and integer scalar SSEx and MMX
+;; data stores generated are all direct path, latency of 2 and use
+;; the FSTORE FP execution unit
+(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "mmxmov,ssemov")
+ (eq_attr "memory" "store,both")))
+ "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+(define_insn_reservation "athlon_movaps_k8" 2
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssemov")
+ (eq_attr "mode" "V4SF,V2DF,TI")))
+ "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))")
+(define_insn_reservation "athlon_movaps" 2
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemov")
+ (eq_attr "mode" "V4SF,V2DF,TI")))
+ "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
+(define_insn_reservation "athlon_mmxssemov" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "mmxmov,ssemov"))
+ "athlon-direct,athlon-fpsched,athlon-faddmul")
+(define_insn_reservation "athlon_mmxmul_load" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "mmxmul")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_mmxmul" 3
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "mmxmul"))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_mmx_load" 3
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "unit" "mmx")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fpload,athlon-faddmul")
+(define_insn_reservation "athlon_mmx" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "unit" "mmx"))
+ "athlon-direct,athlon-fpsched,athlon-faddmul")
+;; SSE operations are handled by the i387 unit as well. The latency
+;; is same as for i387 operations for scalar operations
+
+(define_insn_reservation "athlon_sselog_load" 3
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sselog,sselog1")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
+(define_insn_reservation "athlon_sselog_load_k8" 5
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "sselog,sselog1")
+ (eq_attr "memory" "load")))
+ "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_sselog_load_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sselog,sselog1")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)")
+(define_insn_reservation "athlon_sselog" 3
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "sselog,sselog1"))
+ "athlon-vector,athlon-fpsched,athlon-fmul*2")
+(define_insn_reservation "athlon_sselog_k8" 3
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "sselog,sselog1"))
+ "athlon-double,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_sselog_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "sselog,sselog1"))
+ "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)")
+
+;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
+(define_insn_reservation "athlon_ssecmp_load" 2
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "mode" "SF,DF,DI")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_ssecmp_load_k8" 4
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "ssecmp")
+ (and (eq_attr "mode" "SF,DF,DI,TI")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecmp" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "mode" "SF,DF,DI,TI")))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector_load" 3
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "memory" "load")))
+ "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecmp")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecmpvector" 3
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ssecmp"))
+ "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_k8" 3
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "ssecmp"))
+ "athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "ssecmp"))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssecomi")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load_k8" 6
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssecomi")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecomi")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (eq_attr "type" "ssecmp"))
+ "athlon-vector,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_amdfam10" 3
+ (and (eq_attr "cpu" "amdfam10")
+;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10
+ (eq_attr "type" "ssecomi"))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_sseadd_load" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "mode" "SF,DF,DI")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fpload,athlon-fadd")
+(define_insn_reservation "athlon_sseadd_load_k8" 6
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "mode" "SF,DF,DI")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_sseadd" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "mode" "SF,DF,DI")))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector_load" 5
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_load_k8" 7
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "memory" "load")))
+ "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseadd")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_sseaddvector" 5
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "sseadd"))
+ "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_k8" 5
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "sseadd"))
+ "athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "sseadd"))
+ "athlon-direct,athlon-fpsched,athlon-fadd")
+
+;; Conversions behaves very irregularly and the scheduling is critical here.
+;; Take each instruction separately. Assume that the mode is always set to the
+;; destination one and athlon_decode is set to the K8 versions.
+
+;; cvtss2sd
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4
+ (and (eq_attr "cpu" "k8,athlon,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "mode" "DF")
+ (eq_attr "memory" "load")))))
+ "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "DF")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "direct")
+ (eq_attr "mode" "DF"))))
+ "athlon-direct,athlon-fpsched,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (eq_attr "mode" "DF"))))
+ "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
+;; cvtps2pd. Model same way the other double decoded FP conversions.
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
+ (and (eq_attr "cpu" "k8,athlon,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "V2DF,V4SF,TI")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (and (eq_attr "mode" "V2DF,V4SF,TI")
+ (eq_attr "memory" "load")))))
+ "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
+ (and (eq_attr "cpu" "k8,athlon,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "double")
+ (eq_attr "mode" "V2DF,V4SF,TI"))))
+ "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "direct")
+ (eq_attr "mode" "V2DF,V4SF,TI"))))
+ "athlon-direct,athlon-fpsched,athlon-fstore")
+;; cvtsi2sd mem,reg is directpath path (cvtsi2sd reg,reg is doublepath)
+;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
+ (and (eq_attr "cpu" "athlon,k8")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "direct")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load")))))
+ "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsi2ss mem, reg is doublepath
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load")))))
+ "athlon-vector,athlon-fpload,(athlon-fstore*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-fstore*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
+ (and (eq_attr "cpu" "k8,athlon,generic64")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "none")))))
+ "athlon-double,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsi2ss reg, reg is doublepath
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
+ (and (eq_attr "cpu" "k8,athlon,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-fstore*3)")
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fpsched,(athlon-fvector*3)")
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "vector")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
+;; ??? Why it is fater than cvtsd2ss?
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fpsched,athlon-fvector*2")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssecvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "V4SF,V2DF,TI")
+ (eq_attr "memory" "none")))))
+ "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
+;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
+(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
+ (and (eq_attr "cpu" "athlon,k8,generic64")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "vector")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "memory" "load")))))
+ "athlon-vector,athlon-fploadk8,athlon-fvector")
+(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)")
+;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
+(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "memory" "none")))))
+ "athlon-vector,athlon-fpsched,athlon-fvector")
+(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "athlon_decode" "double")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "memory" "none")))))
+ "athlon-double,athlon-fpsched,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "SI,DI")
+ (eq_attr "memory" "none")))))
+ "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)")
+;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10
+(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "TI")
+ (eq_attr "memory" "load")))))
+ "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10
+(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseicvt")
+ (and (eq_attr "amdfam10_decode" "double")
+ (and (eq_attr "mode" "TI")
+ (eq_attr "memory" "none")))))
+ "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
+
+
+(define_insn_reservation "athlon_ssemul_load" 4
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemul")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fpload,athlon-fmul")
+(define_insn_reservation "athlon_ssemul_load_k8" 6
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "ssemul")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_ssemul" 4
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "mode" "SF,DF")))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector_load" 5
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_load_k8" 7
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "memory" "load")))
+ "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fmul")
+(define_insn_reservation "athlon_ssemulvector" 5
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ssemul"))
+ "athlon-vector,athlon-fpsched,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_k8" 5
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "ssemul"))
+ "athlon-double,athlon-fpsched,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_amdfam10" 4
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "ssemul"))
+ "athlon-direct,athlon-fpsched,athlon-fmul")
+;; divsd timings. divss is faster
+(define_insn_reservation "athlon_ssediv_load" 20
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssediv")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fpload,athlon-fmul*17")
+(define_insn_reservation "athlon_ssediv_load_k8" 22
+ (and (eq_attr "cpu" "k8,generic64,amdfam10")
+ (and (eq_attr "type" "ssediv")
+ (and (eq_attr "mode" "SF,DF")
+ (eq_attr "memory" "load"))))
+ "athlon-direct,athlon-fploadk8,athlon-fmul*17")
+(define_insn_reservation "athlon_ssediv" 20
+ (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
+ (and (eq_attr "type" "ssediv")
+ (eq_attr "mode" "SF,DF")))
+ "athlon-direct,athlon-fpsched,athlon-fmul*17")
+(define_insn_reservation "athlon_ssedivvector_load" 39
+ (and (eq_attr "cpu" "athlon")
+ (and (eq_attr "type" "ssediv")
+ (eq_attr "memory" "load")))
+ "athlon-vector,athlon-fpload2,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_load_k8" 35
+ (and (eq_attr "cpu" "k8,generic64")
+ (and (eq_attr "type" "ssediv")
+ (eq_attr "memory" "load")))
+ "athlon-double,athlon-fpload2k8,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "ssediv")
+ (eq_attr "memory" "load")))
+ "athlon-direct,athlon-fploadk8,athlon-fmul*17")
+(define_insn_reservation "athlon_ssedivvector" 39
+ (and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "ssediv"))
+ "athlon-vector,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_k8" 39
+ (and (eq_attr "cpu" "k8,generic64")
+ (eq_attr "type" "ssediv"))
+ "athlon-double,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_amdfam10" 20
+ (and (eq_attr "cpu" "amdfam10")
+ (eq_attr "type" "ssediv"))
+ "athlon-direct,athlon-fmul*17")
+(define_insn_reservation "athlon_sseins_amdfam10" 5
+ (and (eq_attr "cpu" "amdfam10")
+ (and (eq_attr "type" "sseins")
+ (eq_attr "mode" "TI")))
+ "athlon-vector,athlon-fpsched,athlon-faddmul")
diff --git a/gcc-4.4.0/gcc/config/i386/att.h b/gcc-4.4.0/gcc/config/i386/att.h
new file mode 100644
index 000000000..5f939258c
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/att.h
@@ -0,0 +1,91 @@
+/* Definitions for AT&T assembler syntax for the Intel 80386.
+ Copyright (C) 1988, 1996, 2000, 2001, 2002, 2007, 2009
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+/* Define the syntax of instructions and addresses. */
+
+/* Prefix for internally generated assembler labels. */
+#define LPREFIX ".L"
+
+/* Assembler pseudos to introduce constants of various size. */
+
+#define ASM_SHORT "\t.value\t"
+#define ASM_LONG "\t.long\t"
+#define ASM_QUAD "\t.quad\t" /* Should not be used for 32bit compilation. */
+
+/* How to output an ASCII string constant. */
+
+#undef ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII(FILE, PTR, SIZE) \
+do \
+{ size_t i = 0, limit = (SIZE); \
+ while (i < limit) \
+ { if (i%10 == 0) { if (i!=0) fprintf ((FILE), "\n"); \
+ fputs ("\t.byte\t", (FILE)); } \
+ else fprintf ((FILE), ","); \
+ fprintf ((FILE), "0x%x", ((PTR)[i++] & 0377)) ;} \
+ fprintf ((FILE), "\n"); \
+} while (0)
+
+/* Output at beginning of assembler file. */
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+/* This is how to output an assembler line
+ that says to advance the location counter
+ to a multiple of 2**LOG bytes. */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ if ((LOG)!=0) fprintf ((FILE), "\t.align %d\n", 1<<(LOG))
+
+/* This is how to output an assembler line
+ that says to advance the location counter by SIZE bytes. */
+
+#undef ASM_OUTPUT_SKIP
+#define ASM_OUTPUT_SKIP(FILE,SIZE) \
+ fprintf ((FILE), "\t.set .,.+%u\n", (int)(SIZE))
+
+/* Can't use ASM_OUTPUT_SKIP in text section; it doesn't leave 0s. */
+
+#define ASM_NO_SKIP_IN_TEXT 1
+
+/* Define the syntax of labels and symbol definitions/declarations. */
+
+/* The prefix to add for compiler private assembler symbols. */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* This is how to store into the string BUF
+ the symbol_ref name of an internal numbered label where
+ PREFIX is the class of label and NUM is the number within the class.
+ This is suitable for output with `assemble_name'. */
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \
+ sprintf ((BUF), "%s%s%ld", LOCAL_LABEL_PREFIX, (PREFIX), (long)(NUMBER))
+
+/* The prefix to add to user-visible assembler symbols. */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
diff --git a/gcc-4.4.0/gcc/config/i386/avxintrin.h b/gcc-4.4.0/gcc/config/i386/avxintrin.h
new file mode 100644
index 000000000..26925fd7f
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/avxintrin.h
@@ -0,0 +1,1426 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 11.0. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+/* Internal data types for implementing the intrinsics. */
+typedef double __v4df __attribute__ ((__vector_size__ (32)));
+typedef float __v8sf __attribute__ ((__vector_size__ (32)));
+typedef long long __v4di __attribute__ ((__vector_size__ (32)));
+typedef int __v8si __attribute__ ((__vector_size__ (32)));
+typedef short __v16hi __attribute__ ((__vector_size__ (32)));
+typedef char __v32qi __attribute__ ((__vector_size__ (32)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef float __m256 __attribute__ ((__vector_size__ (32),
+ __may_alias__));
+typedef long long __m256i __attribute__ ((__vector_size__ (32),
+ __may_alias__));
+typedef double __m256d __attribute__ ((__vector_size__ (32),
+ __may_alias__));
+
+/* Compare predicates for scalar and packed compare intrinsics. */
+
+/* Equal (ordered, non-signaling) */
+#define _CMP_EQ_OQ 0x00
+/* Less-than (ordered, signaling) */
+#define _CMP_LT_OS 0x01
+/* Less-than-or-equal (ordered, signaling) */
+#define _CMP_LE_OS 0x02
+/* Unordered (non-signaling) */
+#define _CMP_UNORD_Q 0x03
+/* Not-equal (unordered, non-signaling) */
+#define _CMP_NEQ_UQ 0x04
+/* Not-less-than (unordered, signaling) */
+#define _CMP_NLT_US 0x05
+/* Not-less-than-or-equal (unordered, signaling) */
+#define _CMP_NLE_US 0x06
+/* Ordered (nonsignaling) */
+#define _CMP_ORD_Q 0x07
+/* Equal (unordered, non-signaling) */
+#define _CMP_EQ_UQ 0x08
+/* Not-greater-than-or-equal (unordered, signaling) */
+#define _CMP_NGE_US 0x09
+/* Not-greater-than (unordered, signaling) */
+#define _CMP_NGT_US 0x0a
+/* False (ordered, non-signaling) */
+#define _CMP_FALSE_OQ 0x0b
+/* Not-equal (ordered, non-signaling) */
+#define _CMP_NEQ_OQ 0x0c
+/* Greater-than-or-equal (ordered, signaling) */
+#define _CMP_GE_OS 0x0d
+/* Greater-than (ordered, signaling) */
+#define _CMP_GT_OS 0x0e
+/* True (unordered, non-signaling) */
+#define _CMP_TRUE_UQ 0x0f
+/* Equal (ordered, signaling) */
+#define _CMP_EQ_OS 0x10
+/* Less-than (ordered, non-signaling) */
+#define _CMP_LT_OQ 0x11
+/* Less-than-or-equal (ordered, non-signaling) */
+#define _CMP_LE_OQ 0x12
+/* Unordered (signaling) */
+#define _CMP_UNORD_S 0x13
+/* Not-equal (unordered, signaling) */
+#define _CMP_NEQ_US 0x14
+/* Not-less-than (unordered, non-signaling) */
+#define _CMP_NLT_UQ 0x15
+/* Not-less-than-or-equal (unordered, non-signaling) */
+#define _CMP_NLE_UQ 0x16
+/* Ordered (signaling) */
+#define _CMP_ORD_S 0x17
+/* Equal (unordered, signaling) */
+#define _CMP_EQ_US 0x18
+/* Not-greater-than-or-equal (unordered, non-signaling) */
+#define _CMP_NGE_UQ 0x19
+/* Not-greater-than (unordered, non-signaling) */
+#define _CMP_NGT_UQ 0x1a
+/* False (ordered, signaling) */
+#define _CMP_FALSE_OS 0x1b
+/* Not-equal (ordered, signaling) */
+#define _CMP_NEQ_OS 0x1c
+/* Greater-than-or-equal (ordered, non-signaling) */
+#define _CMP_GE_OQ 0x1d
+/* Greater-than (ordered, non-signaling) */
+#define _CMP_GT_OQ 0x1e
+/* True (unordered, signaling) */
+#define _CMP_TRUE_US 0x1f
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_addpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_add_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_addps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_addsub_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_addsubpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_addsub_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_addsubps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_and_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_and_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_andps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_andnot_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_andnpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_andnot_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_andnps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+/* Double/single precision floating point blend instructions - select
+ data from 2 sources using constant/variable mask. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_pd (__m256d __X, __m256d __Y, const int __M)
+{
+ return (__m256d) __builtin_ia32_blendpd256 ((__v4df)__X,
+ (__v4df)__Y,
+ __M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blend_ps (__m256 __X, __m256 __Y, const int __M)
+{
+ return (__m256) __builtin_ia32_blendps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ __M);
+}
+#else
+#define _mm256_blend_pd(X, Y, M) \
+ ((__m256d) __builtin_ia32_blendpd256 ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(M)))
+
+#define _mm256_blend_ps(X, Y, M) \
+ ((__m256) __builtin_ia32_blendps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(M)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blendv_pd (__m256d __X, __m256d __Y, __m256d __M)
+{
+ return (__m256d) __builtin_ia32_blendvpd256 ((__v4df)__X,
+ (__v4df)__Y,
+ (__v4df)__M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_blendv_ps (__m256 __X, __m256 __Y, __m256 __M)
+{
+ return (__m256) __builtin_ia32_blendvps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ (__v8sf)__M);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_div_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_divpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_div_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_divps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+/* Dot product instructions with mask-defined summing and zeroing parts
+ of result. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dp_ps (__m256 __X, __m256 __Y, const int __M)
+{
+ return (__m256) __builtin_ia32_dpps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ __M);
+}
+#else
+#define _mm256_dp_ps(X, Y, M) \
+ ((__m256) __builtin_ia32_dpps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(M)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_pd (__m256d __X, __m256d __Y)
+{
+ return (__m256d) __builtin_ia32_haddpd256 ((__v4df)__X, (__v4df)__Y);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hadd_ps (__m256 __X, __m256 __Y)
+{
+ return (__m256) __builtin_ia32_haddps256 ((__v8sf)__X, (__v8sf)__Y);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_pd (__m256d __X, __m256d __Y)
+{
+ return (__m256d) __builtin_ia32_hsubpd256 ((__v4df)__X, (__v4df)__Y);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_hsub_ps (__m256 __X, __m256 __Y)
+{
+ return (__m256) __builtin_ia32_hsubps256 ((__v8sf)__X, (__v8sf)__Y);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_maxpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_max_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_maxps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_minpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_min_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_minps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_mulpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mul_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_mulps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_or_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_orpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_or_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_orps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_pd (__m256d __A, __m256d __B, const int __mask)
+{
+ return (__m256d) __builtin_ia32_shufpd256 ((__v4df)__A, (__v4df)__B,
+ __mask);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_shuffle_ps (__m256 __A, __m256 __B, const int __mask)
+{
+ return (__m256) __builtin_ia32_shufps256 ((__v8sf)__A, (__v8sf)__B,
+ __mask);
+}
+#else
+#define _mm256_shuffle_pd(A, B, N) \
+ ((__m256d)__builtin_ia32_shufpd256 ((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), (int)(N)))
+
+#define _mm256_shuffle_ps(A, B, N) \
+ ((__m256) __builtin_ia32_shufps256 ((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), (int)(N)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_subpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sub_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_subps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_xor_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_xorpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_xor_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_xorps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
+{
+ return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
+{
+ return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P)
+{
+ return (__m256d) __builtin_ia32_cmppd256 ((__v4df)__X, (__v4df)__Y,
+ __P);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P)
+{
+ return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
+ __P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
+{
+ return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
+{
+ return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
+}
+#else
+#define _mm_cmp_pd(X, Y, P) \
+ ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P)))
+
+#define _mm_cmp_ps(X, Y, P) \
+ ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P)))
+
+#define _mm256_cmp_pd(X, Y, P) \
+ ((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), (int)(P)))
+
+#define _mm256_cmp_ps(X, Y, P) \
+ ((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), (int)(P)))
+
+#define _mm_cmp_sd(X, Y, P) \
+ ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P)))
+
+#define _mm_cmp_ss(X, Y, P) \
+ ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P)))
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_pd (__m128i __A)
+{
+ return (__m256d)__builtin_ia32_cvtdq2pd256 ((__v4si) __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtepi32_ps (__m256i __A)
+{
+ return (__m256)__builtin_ia32_cvtdq2ps256 ((__v8si) __A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_ps (__m256d __A)
+{
+ return (__m128)__builtin_ia32_cvtpd2ps256 ((__v4df) __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_epi32 (__m256 __A)
+{
+ return (__m256i)__builtin_ia32_cvtps2dq256 ((__v8sf) __A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtps_pd (__m128 __A)
+{
+ return (__m256d)__builtin_ia32_cvtps2pd256 ((__v4sf) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttpd_epi32 (__m256d __A)
+{
+ return (__m128i)__builtin_ia32_cvttpd2dq256 ((__v4df) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtpd_epi32 (__m256d __A)
+{
+ return (__m128i)__builtin_ia32_cvtpd2dq256 ((__v4df) __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvttps_epi32 (__m256 __A)
+{
+ return (__m256i)__builtin_ia32_cvttps2dq256 ((__v8sf) __A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_pd (__m256d __X, const int __N)
+{
+ return (__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)__X, __N);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_ps (__m256 __X, const int __N)
+{
+ return (__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)__X, __N);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extractf128_si256 (__m256i __X, const int __N)
+{
+ return (__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)__X, __N);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi32 (__m256i __X, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
+ return _mm_extract_epi32 (__Y, __N % 4);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi16 (__m256i __X, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
+ return _mm_extract_epi16 (__Y, __N % 8);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi8 (__m256i __X, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
+ return _mm_extract_epi8 (__Y, __N % 16);
+}
+
+#ifdef __x86_64__
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_extract_epi64 (__m256i __X, const int __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
+ return _mm_extract_epi64 (__Y, __N % 2);
+}
+#endif
+#else
+#define _mm256_extractf128_pd(X, N) \
+ ((__m128d) __builtin_ia32_vextractf128_pd256 ((__v4df)(__m256d)(X), \
+ (int)(N)))
+
+#define _mm256_extractf128_ps(X, N) \
+ ((__m128) __builtin_ia32_vextractf128_ps256 ((__v8sf)(__m256)(X), \
+ (int)(N)))
+
+#define _mm256_extractf128_si256(X, N) \
+ ((__m128i) __builtin_ia32_vextractf128_si256 ((__v8si)(__m256i)(X), \
+ (int)(N)))
+
+#define _mm256_extract_epi32(X, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2); \
+ _mm_extract_epi32 (__Y, (N) % 4); \
+ }))
+
+#define _mm256_extract_epi16(X, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3); \
+ _mm_extract_epi16 (__Y, (N) % 8); \
+ }))
+
+#define _mm256_extract_epi8(X, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4); \
+ _mm_extract_epi8 (__Y, (N) % 16); \
+ }))
+
+#ifdef __x86_64__
+#define _mm256_extract_epi64(X, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1); \
+ _mm_extract_epi64 (__Y, (N) % 2); \
+ }))
+#endif
+#endif
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zeroall (void)
+{
+ __builtin_ia32_vzeroall ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_zeroupper (void)
+{
+ __builtin_ia32_vzeroupper ();
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutevar_pd (__m128d __A, __m128i __C)
+{
+ return (__m128d) __builtin_ia32_vpermilvarpd ((__v2df)__A,
+ (__v2di)__C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar_pd (__m256d __A, __m256i __C)
+{
+ return (__m256d) __builtin_ia32_vpermilvarpd256 ((__v4df)__A,
+ (__v4di)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permutevar_ps (__m128 __A, __m128i __C)
+{
+ return (__m128) __builtin_ia32_vpermilvarps ((__v4sf)__A,
+ (__v4si)__C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permutevar_ps (__m256 __A, __m256i __C)
+{
+ return (__m256) __builtin_ia32_vpermilvarps256 ((__v8sf)__A,
+ (__v8si)__C);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute_pd (__m128d __X, const int __C)
+{
+ return (__m128d) __builtin_ia32_vpermilpd ((__v2df)__X, __C);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute_pd (__m256d __X, const int __C)
+{
+ return (__m256d) __builtin_ia32_vpermilpd256 ((__v4df)__X, __C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_permute_ps (__m128 __X, const int __C)
+{
+ return (__m128) __builtin_ia32_vpermilps ((__v4sf)__X, __C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute_ps (__m256 __X, const int __C)
+{
+ return (__m256) __builtin_ia32_vpermilps256 ((__v8sf)__X, __C);
+}
+#else
+#define _mm_permute_pd(X, C) \
+ ((__m128d) __builtin_ia32_vpermilpd ((__v2df)(__m128d)(X), (int)(C)))
+
+#define _mm256_permute_pd(X, C) \
+ ((__m256d) __builtin_ia32_vpermilpd256 ((__v4df)(__m256d)(X), (int)(C)))
+
+#define _mm_permute_ps(X, C) \
+ ((__m128) __builtin_ia32_vpermilps ((__v4sf)(__m128)(X), (int)(C)))
+
+#define _mm256_permute_ps(X, C) \
+ ((__m256) __builtin_ia32_vpermilps256 ((__v8sf)(__m256)(X), (int)(C)))
+#endif
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_pd (__m256d __X, __m256d __Y, const int __C)
+{
+ return (__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)__X,
+ (__v4df)__Y,
+ __C);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_ps (__m256 __X, __m256 __Y, const int __C)
+{
+ return (__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)__X,
+ (__v8sf)__Y,
+ __C);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_permute2f128_si256 (__m256i __X, __m256i __Y, const int __C)
+{
+ return (__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)__X,
+ (__v8si)__Y,
+ __C);
+}
+#else
+#define _mm256_permute2f128_pd(X, Y, C) \
+ ((__m256d) __builtin_ia32_vperm2f128_pd256 ((__v4df)(__m256d)(X), \
+ (__v4df)(__m256d)(Y), \
+ (int)(C)))
+
+#define _mm256_permute2f128_ps(X, Y, C) \
+ ((__m256) __builtin_ia32_vperm2f128_ps256 ((__v8sf)(__m256)(X), \
+ (__v8sf)(__m256)(Y), \
+ (int)(C)))
+
+#define _mm256_permute2f128_si256(X, Y, C) \
+ ((__m256i) __builtin_ia32_vperm2f128_si256 ((__v8si)(__m256i)(X), \
+ (__v8si)(__m256i)(Y), \
+ (int)(C)))
+#endif
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_broadcast_ss (float const *__X)
+{
+ return (__m128) __builtin_ia32_vbroadcastss (__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_sd (double const *__X)
+{
+ return (__m256d) __builtin_ia32_vbroadcastsd256 (__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_ss (float const *__X)
+{
+ return (__m256) __builtin_ia32_vbroadcastss256 (__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_pd (__m128d const *__X)
+{
+ return (__m256d) __builtin_ia32_vbroadcastf128_pd256 (__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_broadcast_ps (__m128 const *__X)
+{
+ return (__m256) __builtin_ia32_vbroadcastf128_ps256 (__X);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_pd (__m256d __X, __m128d __Y, const int __O)
+{
+ return (__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)__X,
+ (__v2df)__Y,
+ __O);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_ps (__m256 __X, __m128 __Y, const int __O)
+{
+ return (__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)__X,
+ (__v4sf)__Y,
+ __O);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insertf128_si256 (__m256i __X, __m128i __Y, const int __O)
+{
+ return (__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)__X,
+ (__v4si)__Y,
+ __O);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi32 (__m256i __X, int __D, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 2);
+ __Y = _mm_insert_epi16 (__Y, __D, __N % 4);
+ return _mm256_insertf128_si256 (__X, __Y, __N >> 2);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi16 (__m256i __X, int __D, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 3);
+ __Y = _mm_insert_epi16 (__Y, __D, __N % 8);
+ return _mm256_insertf128_si256 (__X, __Y, __N >> 3);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi8 (__m256i __X, int __D, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 4);
+ __Y = _mm_insert_epi8 (__Y, __D, __N % 16);
+ return _mm256_insertf128_si256 (__X, __Y, __N >> 4);
+}
+
+#ifdef __x86_64__
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_insert_epi64 (__m256i __X, int __D, int const __N)
+{
+ __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1);
+ __Y = _mm_insert_epi16 (__Y, __D, __N % 2);
+ return _mm256_insertf128_si256 (__X, __Y, __N >> 1);
+}
+#endif
+#else
+#define _mm256_insertf128_pd(X, Y, O) \
+ ((__m256d) __builtin_ia32_vinsertf128_pd256 ((__v4df)(__m256d)(X), \
+ (__v2df)(__m128d)(Y), \
+ (int)(O)))
+
+#define _mm256_insertf128_ps(X, Y, O) \
+ ((__m256) __builtin_ia32_vinsertf128_ps256 ((__v8sf)(__m256)(X), \
+ (__v4sf)(__m128)(Y), \
+ (int)(O)))
+
+#define _mm256_insertf128_si256(X, Y, O) \
+ ((__m256i) __builtin_ia32_vinsertf128_si256 ((__v8si)(__m256i)(X), \
+ (__v4si)(__m128i)(Y), \
+ (int)(O)))
+
+#define _mm256_insert_epi32(X, D, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 2); \
+ __Y = _mm_insert_epi32 (__Y, (D), (N) % 4); \
+ _mm256_insertf128_si256 ((X), __Y, (N) >> 2); \
+ }))
+
+#define _mm256_insert_epi16(X, D, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 3); \
+ __Y = _mm_insert_epi16 (__Y, (D), (N) % 8); \
+ _mm256_insertf128_si256 ((X), __Y, (N) >> 3); \
+ }))
+
+#define _mm256_insert_epi8(X, D, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 4); \
+ __Y = _mm_insert_epi8 (__Y, (D), (N) % 16); \
+ _mm256_insertf128_si256 ((X), __Y, (N) >> 4); \
+ }))
+
+#ifdef __x86_64__
+#define _mm256_insert_epi64(X, D, N) \
+ (__extension__ \
+ ({ \
+ __m128i __Y = _mm256_extractf128_si256 ((X), (N) >> 1); \
+ __Y = _mm_insert_epi64 (__Y, (D), (N) % 2); \
+ _mm256_insertf128_si256 ((X), __Y, (N) >> 1); \
+ }))
+#endif
+#endif
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_pd (double const *__P)
+{
+ return *(__m256d *)__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_pd (double *__P, __m256d __A)
+{
+ *(__m256d *)__P = __A;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_ps (float const *__P)
+{
+ return *(__m256 *)__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_ps (float *__P, __m256 __A)
+{
+ *(__m256 *)__P = __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_pd (double const *__P)
+{
+ return (__m256d) __builtin_ia32_loadupd256 (__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_pd (double *__P, __m256d __A)
+{
+ __builtin_ia32_storeupd256 (__P, (__v4df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_ps (float const *__P)
+{
+ return (__m256) __builtin_ia32_loadups256 (__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_ps (float *__P, __m256 __A)
+{
+ __builtin_ia32_storeups256 (__P, (__v8sf)__A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_si256 (__m256i const *__P)
+{
+ return *__P;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_si256 (__m256i *__P, __m256i __A)
+{
+ *__P = __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_si256 (__m256i const *__P)
+{
+ return (__m256i) __builtin_ia32_loaddqu256 ((char const *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_si256 (__m256i *__P, __m256i __A)
+{
+ __builtin_ia32_storedqu256 ((char *)__P, (__v32qi)__A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_pd (double const *__P, __m128d __M)
+{
+ return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P,
+ (__v2df)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_pd (double *__P, __m128d __M, __m128d __A)
+{
+ __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2df)__M, (__v2df)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_pd (double const *__P, __m256d __M)
+{
+ return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P,
+ (__v4df)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_pd (double *__P, __m256d __M, __m256d __A)
+{
+ __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4df)__M, (__v4df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskload_ps (float const *__P, __m128 __M)
+{
+ return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P,
+ (__v4sf)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskstore_ps (float *__P, __m128 __M, __m128 __A)
+{
+ __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4sf)__M, (__v4sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskload_ps (float const *__P, __m256 __M)
+{
+ return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P,
+ (__v8sf)__M);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskstore_ps (float *__P, __m256 __M, __m256 __A)
+{
+ __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8sf)__M, (__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movehdup_ps (__m256 __X)
+{
+ return (__m256) __builtin_ia32_movshdup256 ((__v8sf)__X);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_moveldup_ps (__m256 __X)
+{
+ return (__m256) __builtin_ia32_movsldup256 ((__v8sf)__X);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movedup_pd (__m256d __X)
+{
+ return (__m256d) __builtin_ia32_movddup256 ((__v4df)__X);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_lddqu_si256 (__m256i const *__P)
+{
+ return (__m256i) __builtin_ia32_lddqu256 ((char const *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_si256 (__m256i *__A, __m256i __B)
+{
+ __builtin_ia32_movntdq256 ((__v4di *)__A, (__v4di)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_pd (double *__A, __m256d __B)
+{
+ __builtin_ia32_movntpd256 (__A, (__v4df)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_stream_ps (float *__P, __m256 __A)
+{
+ __builtin_ia32_movntps256 (__P, (__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rcp_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_rcpps256 ((__v8sf)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rsqrt_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_rsqrtps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_pd (__m256d __A)
+{
+ return (__m256d) __builtin_ia32_sqrtpd256 ((__v4df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_sqrt_ps (__m256 __A)
+{
+ return (__m256) __builtin_ia32_sqrtps256 ((__v8sf)__A);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_round_pd (__m256d __V, const int __M)
+{
+ return (__m256d) __builtin_ia32_roundpd256 ((__v4df)__V, __M);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_round_ps (__m256 __V, const int __M)
+{
+ return (__m256) __builtin_ia32_roundps256 ((__v8sf)__V, __M);
+}
+#else
+#define _mm256_round_pd(V, M) \
+ ((__m256d) __builtin_ia32_roundpd256 ((__v4df)(__m256d)(V), (int)(M)))
+
+#define _mm256_round_ps(V, M) \
+ ((__m256) __builtin_ia32_roundps256 ((__v8sf)(__m256)(V), (int)(M)))
+#endif
+
+#define _mm256_ceil_pd(V) _mm256_round_pd ((V), _MM_FROUND_CEIL)
+#define _mm256_floor_pd(V) _mm256_round_pd ((V), _MM_FROUND_FLOOR)
+#define _mm256_ceil_ps(V) _mm256_round_ps ((V), _MM_FROUND_CEIL)
+#define _mm256_floor_ps(V) _mm256_round_ps ((V), _MM_FROUND_FLOOR)
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpckhpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_pd (__m256d __A, __m256d __B)
+{
+ return (__m256d) __builtin_ia32_unpcklpd256 ((__v4df)__A, (__v4df)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpackhi_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpckhps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_unpacklo_ps (__m256 __A, __m256 __B)
+{
+ return (__m256) __builtin_ia32_unpcklps256 ((__v8sf)__A, (__v8sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_pd (__m128d __M, __m128d __V)
+{
+ return __builtin_ia32_vtestzpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_pd (__m128d __M, __m128d __V)
+{
+ return __builtin_ia32_vtestcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_pd (__m128d __M, __m128d __V)
+{
+ return __builtin_ia32_vtestnzcpd ((__v2df)__M, (__v2df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_ps (__m128 __M, __m128 __V)
+{
+ return __builtin_ia32_vtestzps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_ps (__m128 __M, __m128 __V)
+{
+ return __builtin_ia32_vtestcps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_ps (__m128 __M, __m128 __V)
+{
+ return __builtin_ia32_vtestnzcps ((__v4sf)__M, (__v4sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_pd (__m256d __M, __m256d __V)
+{
+ return __builtin_ia32_vtestzpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_pd (__m256d __M, __m256d __V)
+{
+ return __builtin_ia32_vtestcpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_pd (__m256d __M, __m256d __V)
+{
+ return __builtin_ia32_vtestnzcpd256 ((__v4df)__M, (__v4df)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_ps (__m256 __M, __m256 __V)
+{
+ return __builtin_ia32_vtestzps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_ps (__m256 __M, __m256 __V)
+{
+ return __builtin_ia32_vtestcps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_ps (__m256 __M, __m256 __V)
+{
+ return __builtin_ia32_vtestnzcps256 ((__v8sf)__M, (__v8sf)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testz_si256 (__m256i __M, __m256i __V)
+{
+ return __builtin_ia32_ptestz256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testc_si256 (__m256i __M, __m256i __V)
+{
+ return __builtin_ia32_ptestc256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_testnzc_si256 (__m256i __M, __m256i __V)
+{
+ return __builtin_ia32_ptestnzc256 ((__v4di)__M, (__v4di)__V);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movemask_pd (__m256d __A)
+{
+ return __builtin_ia32_movmskpd256 ((__v4df)__A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_movemask_ps (__m256 __A)
+{
+ return __builtin_ia32_movmskps256 ((__v8sf)__A);
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_pd (void)
+{
+ return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_ps (void)
+{
+ return __extension__ (__m256){ 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0 };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setzero_si256 (void)
+{
+ return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
+}
+
+/* Create the vector [A B C D]. */
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_pd (double __A, double __B, double __C, double __D)
+{
+ return __extension__ (__m256d){ __D, __C, __B, __A };
+}
+
+/* Create the vector [A B C D E F G H]. */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_ps (float __A, float __B, float __C, float __D,
+ float __E, float __F, float __G, float __H)
+{
+ return __extension__ (__m256){ __H, __G, __F, __E,
+ __D, __C, __B, __A };
+}
+
+/* Create the vector [A B C D E F G H]. */
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi32 (int __A, int __B, int __C, int __D,
+ int __E, int __F, int __G, int __H)
+{
+ return __extension__ (__m256i)(__v8si){ __H, __G, __F, __E,
+ __D, __C, __B, __A };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi16 (short __q15, short __q14, short __q13, short __q12,
+ short __q11, short __q10, short __q09, short __q08,
+ short __q07, short __q06, short __q05, short __q04,
+ short __q03, short __q02, short __q01, short __q00)
+{
+ return __extension__ (__m256i)(__v16hi){
+ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
+ };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi8 (char __q31, char __q30, char __q29, char __q28,
+ char __q27, char __q26, char __q25, char __q24,
+ char __q23, char __q22, char __q21, char __q20,
+ char __q19, char __q18, char __q17, char __q16,
+ char __q15, char __q14, char __q13, char __q12,
+ char __q11, char __q10, char __q09, char __q08,
+ char __q07, char __q06, char __q05, char __q04,
+ char __q03, char __q02, char __q01, char __q00)
+{
+ return __extension__ (__m256i)(__v32qi){
+ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
+ __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
+ __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
+ };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set_epi64x (long long __A, long long __B, long long __C,
+ long long __D)
+{
+ return __extension__ (__m256i)(__v4di){ __D, __C, __B, __A };
+}
+
+/* Create a vector with all elements equal to A. */
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_pd (double __A)
+{
+ return __extension__ (__m256d){ __A, __A, __A, __A };
+}
+
+/* Create a vector with all elements equal to A. */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_ps (float __A)
+{
+ return __extension__ (__m256){ __A, __A, __A, __A,
+ __A, __A, __A, __A };
+}
+
+/* Create a vector with all elements equal to A. */
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi32 (int __A)
+{
+ return __extension__ (__m256i)(__v8si){ __A, __A, __A, __A,
+ __A, __A, __A, __A };
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi16 (short __A)
+{
+ return _mm256_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi8 (char __A)
+{
+ return _mm256_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_epi64x (long long __A)
+{
+ return __extension__ (__m256i)(__v4di){ __A, __A, __A, __A };
+}
+
+/* Create vectors of elements in the reversed order from the
+ _mm256_set_XXX functions. */
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_pd (double __A, double __B, double __C, double __D)
+{
+ return _mm256_set_pd (__D, __C, __B, __A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_ps (float __A, float __B, float __C, float __D,
+ float __E, float __F, float __G, float __H)
+{
+ return _mm256_set_ps (__H, __G, __F, __E, __D, __C, __B, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi32 (int __A, int __B, int __C, int __D,
+ int __E, int __F, int __G, int __H)
+{
+ return _mm256_set_epi32 (__H, __G, __F, __E, __D, __C, __B, __A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi16 (short __q15, short __q14, short __q13, short __q12,
+ short __q11, short __q10, short __q09, short __q08,
+ short __q07, short __q06, short __q05, short __q04,
+ short __q03, short __q02, short __q01, short __q00)
+{
+ return _mm256_set_epi16 (__q00, __q01, __q02, __q03,
+ __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11,
+ __q12, __q13, __q14, __q15);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi8 (char __q31, char __q30, char __q29, char __q28,
+ char __q27, char __q26, char __q25, char __q24,
+ char __q23, char __q22, char __q21, char __q20,
+ char __q19, char __q18, char __q17, char __q16,
+ char __q15, char __q14, char __q13, char __q12,
+ char __q11, char __q10, char __q09, char __q08,
+ char __q07, char __q06, char __q05, char __q04,
+ char __q03, char __q02, char __q01, char __q00)
+{
+ return _mm256_set_epi8 (__q00, __q01, __q02, __q03,
+ __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11,
+ __q12, __q13, __q14, __q15,
+ __q16, __q17, __q18, __q19,
+ __q20, __q21, __q22, __q23,
+ __q24, __q25, __q26, __q27,
+ __q28, __q29, __q30, __q31);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_setr_epi64x (long long __A, long long __B, long long __C,
+ long long __D)
+{
+ return _mm256_set_epi64x (__D, __C, __B, __A);
+}
+
+/* Casts between various SP, DP, INT vector types. Note that these do no
+ conversion of values, they just change the type. */
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_ps (__m256d __A)
+{
+ return (__m256) __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd_si256 (__m256d __A)
+{
+ return (__m256i) __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_pd (__m256 __A)
+{
+ return (__m256d) __A;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps_si256(__m256 __A)
+{
+ return (__m256i) __A;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_ps (__m256i __A)
+{
+ return (__m256) __A;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_pd (__m256i __A)
+{
+ return (__m256d) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd256_pd128 (__m256d __A)
+{
+ return (__m128d) __builtin_ia32_pd_pd256 ((__v4df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps256_ps128 (__m256 __A)
+{
+ return (__m128) __builtin_ia32_ps_ps256 ((__v8sf)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi256_si128 (__m256i __A)
+{
+ return (__m128i) __builtin_ia32_si_si256 ((__v8si)__A);
+}
+
+/* When cast is done from a 128 to 256-bit type, the low 128 bits of
+ the 256-bit result contain source parameter value and the upper 128
+ bits of the result are undefined. Those intrinsics shouldn't
+ generate any extra moves. */
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castpd128_pd256 (__m128d __A)
+{
+ return (__m256d) __builtin_ia32_pd256_pd ((__v2df)__A);
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castps128_ps256 (__m128 __A)
+{
+ return (__m256) __builtin_ia32_ps256_ps ((__v4sf)__A);
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_castsi128_si256 (__m128i __A)
+{
+ return (__m256i) __builtin_ia32_si256_si ((__v4si)__A);
+}
diff --git a/gcc-4.4.0/gcc/config/i386/biarch64.h b/gcc-4.4.0/gcc/config/i386/biarch64.h
new file mode 100644
index 000000000..629ec980d
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/biarch64.h
@@ -0,0 +1,29 @@
+/* Make configure files to produce biarch compiler defaulting to 64bit mode.
+ This file must be included very first, while the OS specific file later
+ to overwrite otherwise wrong defaults.
+ Copyright (C) 2001, 2007, 2009 Free Software Foundation, Inc.
+ Contributed by Bo Thorsen <bo@suse.de>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#define TARGET_64BIT_DEFAULT OPTION_MASK_ISA_64BIT
+#define TARGET_BI_ARCH 1
diff --git a/gcc-4.4.0/gcc/config/i386/bmmintrin.h b/gcc-4.4.0/gcc/config/i386/bmmintrin.h
new file mode 100644
index 000000000..e92768c25
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/bmmintrin.h
@@ -0,0 +1,1261 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _BMMINTRIN_H_INCLUDED
+#define _BMMINTRIN_H_INCLUDED
+
+#ifndef __SSE5__
+# error "SSE5 instruction set not enabled"
+#else
+
+/* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files. */
+#include <ammintrin.h>
+#include <mmintrin-common.h>
+
+/* Floating point multiply/add type instructions */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_fmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_fmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_fmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_fmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_fmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_fmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_fmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_fmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_fnmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_fnmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_fnmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_fnmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_fnmsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_fnmsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
+{
+ return (__m128) __builtin_ia32_fnmsubss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
+{
+ return (__m128d) __builtin_ia32_fnmsubsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
+}
+
+/* Integer multiply/add intructions. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
+}
+
+/* Packed Integer Horizontal Add and Subtract */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epi8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phaddbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phaddbd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phaddbq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epi16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phaddwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phaddwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epi32(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phadddq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddw_epu8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phaddubw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phaddubd ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phaddubq ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddd_epu16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phadduwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phadduwq ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_haddq_epu32(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phaddudq ((__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubw_epi8(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phsubbw ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubd_epi16(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phsubwd ((__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubq_epi32(__m128i __A)
+{
+ return (__m128i) __builtin_ia32_phsubdq ((__v4si)__A);
+}
+
+/* Vector conditional move and permute */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pcmov (__A, __B, __C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
+{
+ return (__m128i) __builtin_ia32_pperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_perm_ps(__m128 __A, __m128 __B, __m128i __C)
+{
+ return (__m128) __builtin_ia32_permps ((__m128)__A, (__m128)__B, (__v16qi)__C);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_perm_pd(__m128d __A, __m128d __B, __m128i __C)
+{
+ return (__m128d) __builtin_ia32_permpd ((__m128d)__A, (__m128d)__B, (__v16qi)__C);
+}
+
+/* Packed Integer Rotates and Shifts */
+
+/* Rotates - Non-Immediate form */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_protb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_protw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_protd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rot_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_protq ((__v2di)__A, (__v2di)__B);
+}
+
+
+/* Rotates - Immediate form */
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi8(__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_protbi ((__v16qi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi16(__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_protwi ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi32(__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_protdi ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_roti_epi64(__m128i __A, const int __B)
+{
+ return (__m128i) __builtin_ia32_protqi ((__v2di)__A, __B);
+}
+#else
+#define _mm_roti_epi8(A, N) \
+ ((__m128i) __builtin_ia32_protbi ((__v16qi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi16(A, N) \
+ ((__m128i) __builtin_ia32_protwi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi32(A, N) \
+ ((__m128i) __builtin_ia32_protdi ((__v4si)(__m128i)(A), (int)(N)))
+#define _mm_roti_epi64(A, N) \
+ ((__m128i) __builtin_ia32_protqi ((__v2di)(__m128i)(A), (int)(N)))
+#endif
+
+/* pshl */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshlb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshlw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshld ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shl_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshlq ((__v2di)__A, (__v2di)__B);
+}
+
+/* psha */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshab ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshaw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshad ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sha_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pshaq ((__v2di)__A, (__v2di)__B);
+}
+
+/* Compare and Predicate Generation */
+
+/* com (floating point, packed single) */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comeqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comltps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comleps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comunord_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comunordps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comuneqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comnlt_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comunltps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comnle_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comunleps ((__v4sf)__A, (__v4sf)__B);
+}
+
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comord_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comordps ((__v4sf)__A, (__v4sf)__B);
+}
+
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comueq_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comueqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comnge_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comungeps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comngt_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comungtps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comfalseps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comoneq_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comneqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comgeps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comgtps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_ps(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comtrueps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* com (floating point, packed double) */
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comeqpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comltpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comlepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comunord_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comunordpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comuneqpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comnlt_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comunltpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comnle_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comunlepd ((__v2df)__A, (__v2df)__B);
+}
+
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comord_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comordpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comueq_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comueqpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comnge_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comungepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comngt_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comungtpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comfalsepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comoneq_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comneqpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comgepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comgtpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_pd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comtruepd ((__v2df)__A, (__v2df)__B);
+}
+
+/* com (floating point, scalar single) */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comeqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comltss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comless ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comunord_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comunordss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comuneqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comnlt_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comunltss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comnle_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comunless ((__v4sf)__A, (__v4sf)__B);
+}
+
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comord_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comordss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comueq_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comueqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comnge_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comungess ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comngt_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comungtss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comfalsess ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comoneq_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comneqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comgess ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comgtss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_ss(__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_comtruess ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* com (floating point, scalar double) */
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comeqsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comltsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comlesd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comunord_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comunordsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comuneqsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comnlt_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comunltsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comnle_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comunlesd ((__v2df)__A, (__v2df)__B);
+}
+
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comord_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comordsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comueq_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comueqsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comnge_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comungesd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comngt_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comungtsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comfalsesd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comoneq_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comneqsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comgesd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comgtsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_sd(__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_comtruesd ((__v2df)__A, (__v2df)__B);
+}
+
+
+/*pcom (integer, unsinged bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomltub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomleub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgtub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgeub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomnequb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomfalseub ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomtrueub ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, unsinged words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomltuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomleuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgtuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgeuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomnequw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomfalseuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomtrueuw ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, unsinged double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomltud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomleud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgtud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgeud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomnequd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomfalseud ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomtrueud ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, unsinged quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomltuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomleuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgtuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgeuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomnequq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomfalseuq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epu64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomtrueuq ((__v2di)__A, (__v2di)__B);
+}
+
+/*pcom (integer, signed bytes) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomltb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomleb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgtb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgeb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomeqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomneqb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomfalseb ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi8(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomtrueb ((__v16qi)__A, (__v16qi)__B);
+}
+
+/*pcom (integer, signed words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomltw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomlew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgtw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomeqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomneqw ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomfalsew ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi16(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomtruew ((__v8hi)__A, (__v8hi)__B);
+}
+
+/*pcom (integer, signed double words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomltd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomled ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgtd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomged ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomeqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomneqd ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomfalsed ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi32(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomtrued ((__v4si)__A, (__v4si)__B);
+}
+
+/*pcom (integer, signed quad words) */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comlt_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomltq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comle_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomleq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comgt_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgtq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comge_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomgeq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comeq_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomeqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comneq_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomneqq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comfalse_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomfalseq ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comtrue_epi64(__m128i __A, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_pcomtrueq ((__v2di)__A, (__v2di)__B);
+}
+
+/* FRCZ */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_frczps ((__v4sf)__A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_pd (__m128d __A)
+{
+ return (__m128d) __builtin_ia32_frczpd ((__v2df)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_frczss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_frcz_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_frczsd ((__v2df)__A, (__v2df)__B);
+}
+
+#endif /* __SSE5__ */
+
+#endif /* _BMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/bsd.h b/gcc-4.4.0/gcc/config/i386/bsd.h
new file mode 100644
index 000000000..229777a46
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/bsd.h
@@ -0,0 +1,99 @@
+/* Definitions for BSD assembler syntax for Intel 386
+ (actually AT&T syntax for insns and operands,
+ adapted to BSD conventions for symbol names and debugging.)
+ Copyright (C) 1988, 1996, 2000, 2002, 2007, 2008
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Use the Sequent Symmetry assembler syntax. */
+
+/* Define the syntax of pseudo-ops, labels and comments. */
+
+/* Prefix for internally generated assembler labels. If we aren't using
+ underscores, we are using prefix `.'s to identify labels that should
+ be ignored, as in `i386/gas.h' --karl@cs.umb.edu */
+
+#define LPREFIX "L"
+
+/* Assembler pseudos to introduce constants of various size. */
+
+#define ASM_SHORT "\t.word\t"
+#define ASM_LONG "\t.long\t"
+#define ASM_QUAD "\t.quad\t" /* Should not be used for 32bit compilation. */
+
+/* This was suggested, but it shouldn't be right for DBX output. -- RMS
+ #define ASM_OUTPUT_SOURCE_FILENAME(FILE, NAME) */
+
+
+/* Define the syntax of labels and symbol definitions/declarations. */
+
+/* This is how to output an assembler line
+ that says to advance the location counter by SIZE bytes. */
+
+#define ASM_OUTPUT_SKIP(FILE,SIZE) \
+ fprintf (FILE, "\t.space "HOST_WIDE_INT_PRINT_UNSIGNED"\n", (SIZE))
+
+/* Define the syntax of labels and symbol definitions/declarations. */
+
+/* This says how to output an assembler line
+ to define a global common symbol. */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \
+( fputs (".comm ", (FILE)), \
+ assemble_name ((FILE), (NAME)), \
+ fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+/* This says how to output an assembler line
+ to define a local common symbol. */
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) \
+( fputs (".lcomm ", (FILE)), \
+ assemble_name ((FILE), (NAME)), \
+ fprintf ((FILE), ",%u\n", (int)(ROUNDED)))
+
+#ifdef HAVE_GAS_LCOMM_WITH_ALIGNMENT
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGNMENT) \
+( fputs (".lcomm ", (FILE)), \
+ assemble_name ((FILE), (NAME)), \
+ fprintf ((FILE), ",%u,%u\n", (int)(SIZE), (int)(ALIGNMENT) / BITS_PER_UNIT))
+#endif
+
+/* This is how to output an assembler line
+ that says to advance the location counter
+ to a multiple of 2**LOG bytes. */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ if ((LOG)!=0) fprintf ((FILE), "\t.align %d\n", (LOG))
+
+/* This is how to store into the string BUF
+ the symbol_ref name of an internal numbered label where
+ PREFIX is the class of label and NUM is the number within the class.
+ This is suitable for output with `assemble_name'. */
+
+#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \
+ sprintf ((BUF), "*%s%ld", (PREFIX), (long)(NUMBER))
+
+/* The prefix to add to user-visible assembler symbols. */
+
+#define USER_LABEL_PREFIX "_"
+
+/* Sequent has some changes in the format of DBX symbols. */
+#define DBX_NO_XREFS 1
+
+/* Don't split DBX symbols into continuations. */
+#define DBX_CONTIN_LENGTH 0
diff --git a/gcc-4.4.0/gcc/config/i386/chkstk.asm b/gcc-4.4.0/gcc/config/i386/chkstk.asm
new file mode 100644
index 000000000..d607fb96d
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/chkstk.asm
@@ -0,0 +1,148 @@
+/* Stuff needed in libgcc for stack probes.
+ *
+ * Copyright (C) 1996, 1998, 2001, 2003, 2008 Free Software Foundation, Inc.
+ * Written By Steve Chamberlain
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * In addition to the permissions in the GNU General Public License, the
+ * Free Software Foundation gives you unlimited permission to link the
+ * compiled version of this file with other programs, and to distribute
+ * those programs without any restriction coming from the use of this
+ * file. (The General Public License restrictions do apply in other
+ * respects; for example, they cover modification of the file, and
+ * distribution when not linked into another program.)
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * As a special exception, if you link this library with files
+ * compiled with GCC to produce an executable, this does not cause
+ * the resulting executable to be covered by the GNU General Public License.
+ * This exception does not however invalidate any other reasons why
+ * the executable file might be covered by the GNU General Public License.
+ */
+
+#ifdef L_chkstk
+
+/* Function prologue calls _alloca to probe the stack when allocating more
+ than CHECK_STACK_LIMIT bytes in one go. Touching the stack at 4K
+ increments is necessary to ensure that the guard pages used
+ by the OS virtual memory manger are allocated in correct sequence. */
+
+ .global ___chkstk
+ .global __alloca
+#ifndef __x86_64__
+___chkstk:
+__alloca:
+ pushl %ecx /* save temp */
+ movl %esp, %ecx
+ cmpl $0x1000, %eax /* > 4k ?*/
+ jb Ldone
+
+ /* 0x1000 - 0x8 = 0xff8.
+ 8 bytes to point past return address and ecx saved on the stack. */
+ subl $0xff8, %esp
+ jmp Lprobe1
+
+Lprobe:
+ subl $0x1000, %esp /* yes, move pointer down 4k*/
+Lprobe1:
+ orl $0x0, (%esp) /* probe there */
+ subl $0x1000, %eax /* decrement count */
+ cmpl $0x1000, %eax
+ ja Lprobe /* and do it again */
+ jmp Ldone1
+
+Ldone:
+ /* 8 bytes to point past return address and ecx saved on the stack. */
+ subl $0x8, %eax
+Ldone1:
+ subl %eax, %esp
+ orl $0x0, (%esp) /* less than 4k, just peek here */
+
+ movl %ecx, %eax /* save old stack pointer */
+ movl (%eax), %ecx /* recover saved temp */
+ movl 4(%eax), %eax /* recover return address */
+
+ /* Push the return value back. Doing this instead of just
+ jumping to %eax preserves the cached call-return stack
+ used by most modern processors. */
+ pushl %eax
+ ret
+#else
+/* __alloca is a normal function call, which uses %rcx as the argument. And stack space
+ for the argument is saved. */
+__alloca:
+ movq %rcx, %rax
+ addq $0x7, %rax
+ andq $0xfffffffffffffff8, %rax
+ popq %rcx /* pop return address */
+ popq %r10 /* Pop the reserved stack space. */
+ movq %rsp, %r10 /* get sp */
+ cmpq $0x1000, %rax /* > 4k ?*/
+ jb Ldone_alloca
+
+Lprobe_alloca:
+ subq $0x1000, %r10 /* yes, move pointer down 4k*/
+ movq %r10, %rsp /* update sp before probing */
+ orq $0x0, (%r10) /* probe there */
+ subq $0x1000, %rax /* decrement count */
+ cmpq $0x1000, %rax
+ ja Lprobe_alloca /* and do it again */
+
+Ldone_alloca:
+ subq %rax, %r10
+ movq %r10, %rsp /* update sp before probing */
+ orq $0x0, (%r10) /* less than 4k, just peek here */
+ movq %r10, %rax
+ subq $0x8, %r10 /* Reserve argument stack space. */
+ movq %r10, %rsp /* decrement stack */
+
+ /* Push the return value back. Doing this instead of just
+ jumping to %rcx preserves the cached call-return stack
+ used by most modern processors. */
+ pushq %rcx
+ ret
+
+/* ___chkstk is a *special* function call, which uses %rax as the argument.
+ We avoid clobbering the 4 integer argument registers, %rcx, %rdx,
+ %r8 and %r9, which leaves us with %rax, %r10, and %r11 to use. */
+___chkstk:
+ addq $0x7, %rax /* Make sure stack is on alignment of 8. */
+ andq $0xfffffffffffffff8, %rax
+ popq %r11 /* pop return address */
+ movq %rsp, %r10 /* get sp */
+ cmpq $0x1000, %rax /* > 4k ?*/
+ jb Ldone
+
+Lprobe:
+ subq $0x1000, %r10 /* yes, move pointer down 4k*/
+ movq %r10, %rsp /* update sp before probing */
+ orl $0x0, (%r10) /* probe there */
+ subq $0x1000, %rax /* decrement count */
+ cmpq $0x1000, %rax
+ ja Lprobe /* and do it again */
+
+Ldone:
+ subq %rax, %r10
+ movq %r10, %rsp /* update sp before probing */
+ orl $0x0, (%r10) /* less than 4k, just peek here */
+
+ /* Push the return value back. Doing this instead of just
+ jumping to %r11 preserves the cached call-return stack
+ used by most modern processors. */
+ pushq %r11
+ ret
+#endif
+#endif
diff --git a/gcc-4.4.0/gcc/config/i386/constraints.md b/gcc-4.4.0/gcc/config/i386/constraints.md
new file mode 100644
index 000000000..134ef61a4
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/constraints.md
@@ -0,0 +1,168 @@
+;; Constraint definitions for IA-32 and x86-64.
+;; Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;; Unused letters:
+;;; B H TU W
+;;; h jk vw z
+
+;; Integer register constraints.
+;; It is not necessary to define 'r' here.
+(define_register_constraint "R" "LEGACY_REGS"
+ "Legacy register---the eight integer registers available on all
+ i386 processors (@code{a}, @code{b}, @code{c}, @code{d},
+ @code{si}, @code{di}, @code{bp}, @code{sp}).")
+
+(define_register_constraint "q" "TARGET_64BIT ? GENERAL_REGS : Q_REGS"
+ "Any register accessible as @code{@var{r}l}. In 32-bit mode, @code{a},
+ @code{b}, @code{c}, and @code{d}; in 64-bit mode, any integer register.")
+
+(define_register_constraint "Q" "Q_REGS"
+ "Any register accessible as @code{@var{r}h}: @code{a}, @code{b},
+ @code{c}, and @code{d}.")
+
+(define_register_constraint "l" "INDEX_REGS"
+ "@internal Any register that can be used as the index in a base+index
+ memory access: that is, any general register except the stack pointer.")
+
+(define_register_constraint "a" "AREG"
+ "The @code{a} register.")
+
+(define_register_constraint "b" "BREG"
+ "The @code{b} register.")
+
+(define_register_constraint "c" "CREG"
+ "The @code{c} register.")
+
+(define_register_constraint "d" "DREG"
+ "The @code{d} register.")
+
+(define_register_constraint "S" "SIREG"
+ "The @code{si} register.")
+
+(define_register_constraint "D" "DIREG"
+ "The @code{di} register.")
+
+(define_register_constraint "A" "AD_REGS"
+ "The @code{a} and @code{d} registers, as a pair (for instructions
+ that return half the result in one and half in the other).")
+
+;; Floating-point register constraints.
+(define_register_constraint "f"
+ "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FLOAT_REGS : NO_REGS"
+ "Any 80387 floating-point (stack) register.")
+
+(define_register_constraint "t"
+ "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_TOP_REG : NO_REGS"
+ "Top of 80387 floating-point stack (@code{%st(0)}).")
+
+(define_register_constraint "u"
+ "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_SECOND_REG : NO_REGS"
+ "Second from top of 80387 floating-point stack (@code{%st(1)}).")
+
+;; Vector registers (also used for plain floating point nowadays).
+(define_register_constraint "y" "TARGET_MMX ? MMX_REGS : NO_REGS"
+ "Any MMX register.")
+
+(define_register_constraint "x" "TARGET_SSE ? SSE_REGS : NO_REGS"
+ "Any SSE register.")
+
+;; We use the Y prefix to denote any number of conditional register sets:
+;; z First SSE register.
+;; 2 SSE2 enabled
+;; i SSE2 inter-unit moves enabled
+;; m MMX inter-unit moves enabled
+
+(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
+ "First SSE register (@code{%xmm0}).")
+
+(define_register_constraint "Y2" "TARGET_SSE2 ? SSE_REGS : NO_REGS"
+ "@internal Any SSE register, when SSE2 is enabled.")
+
+(define_register_constraint "Yi"
+ "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES ? SSE_REGS : NO_REGS"
+ "@internal Any SSE register, when SSE2 and inter-unit moves are enabled.")
+
+(define_register_constraint "Ym"
+ "TARGET_MMX && TARGET_INTER_UNIT_MOVES ? MMX_REGS : NO_REGS"
+ "@internal Any MMX register, when inter-unit moves are enabled.")
+
+;; Integer constant constraints.
+(define_constraint "I"
+ "Integer constant in the range 0 @dots{} 31, for 32-bit shifts."
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (ival, 0, 31)")))
+
+(define_constraint "J"
+ "Integer constant in the range 0 @dots{} 63, for 64-bit shifts."
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (ival, 0, 63)")))
+
+(define_constraint "K"
+ "Signed 8-bit integer constant."
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (ival, -128, 127)")))
+
+(define_constraint "L"
+ "@code{0xFF} or @code{0xFFFF}, for andsi as a zero-extending move."
+ (and (match_code "const_int")
+ (match_test "ival == 0xFF || ival == 0xFFFF")))
+
+(define_constraint "M"
+ "0, 1, 2, or 3 (shifts for the @code{lea} instruction)."
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (ival, 0, 3)")))
+
+(define_constraint "N"
+ "Unsigned 8-bit integer constant (for @code{in} and @code{out}
+ instructions)."
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (ival, 0, 255)")))
+
+(define_constraint "O"
+ "@internal Integer constant in the range 0 @dots{} 127, for 128-bit shifts."
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (ival, 0, 127)")))
+
+;; Floating-point constant constraints.
+;; We allow constants even if TARGET_80387 isn't set, because the
+;; stack register converter may need to load 0.0 into the function
+;; value register (top of stack).
+(define_constraint "G"
+ "Standard 80387 floating point constant."
+ (and (match_code "const_double")
+ (match_test "standard_80387_constant_p (op)")))
+
+;; This can theoretically be any mode's CONST0_RTX.
+(define_constraint "C"
+ "Standard SSE floating point constant."
+ (match_test "standard_sse_constant_p (op)"))
+
+;; Constant-or-symbol-reference constraints.
+
+(define_constraint "e"
+ "32-bit signed integer constant, or a symbolic reference known
+ to fit that range (for immediate operands in sign-extending x86-64
+ instructions)."
+ (match_operand 0 "x86_64_immediate_operand"))
+
+(define_constraint "Z"
+ "32-bit unsigned integer constant, or a symbolic reference known
+ to fit that range (for immediate operands in zero-extending x86-64
+ instructions)."
+ (match_operand 0 "x86_64_zext_immediate_operand"))
diff --git a/gcc-4.4.0/gcc/config/i386/cpuid.h b/gcc-4.4.0/gcc/config/i386/cpuid.h
new file mode 100644
index 000000000..b5258652e
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/cpuid.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/* %ecx */
+#define bit_SSE3 (1 << 0)
+#define bit_PCLMUL (1 << 1)
+#define bit_SSSE3 (1 << 9)
+#define bit_FMA (1 << 12)
+#define bit_CMPXCHG16B (1 << 13)
+#define bit_SSE4_1 (1 << 19)
+#define bit_SSE4_2 (1 << 20)
+#define bit_POPCNT (1 << 23)
+#define bit_AES (1 << 25)
+#define bit_XSAVE (1 << 26)
+#define bit_OSXSAVE (1 << 27)
+#define bit_AVX (1 << 28)
+
+/* %edx */
+#define bit_CMPXCHG8B (1 << 8)
+#define bit_CMOV (1 << 15)
+#define bit_MMX (1 << 23)
+#define bit_FXSAVE (1 << 24)
+#define bit_SSE (1 << 25)
+#define bit_SSE2 (1 << 26)
+
+/* Extended Features */
+/* %ecx */
+#define bit_LAHF_LM (1 << 0)
+#define bit_SSE4a (1 << 6)
+#define bit_SSE5 (1 << 11)
+
+/* %edx */
+#define bit_LM (1 << 29)
+#define bit_3DNOWP (1 << 30)
+#define bit_3DNOW (1 << 31)
+
+
+#if defined(__i386__) && defined(__PIC__)
+/* %ebx may be the PIC register. */
+#if __GNUC__ >= 3
+#define __cpuid(level, a, b, c, d) \
+ __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
+ "cpuid\n\t" \
+ "xchg{l}\t{%%}ebx, %1\n\t" \
+ : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
+ : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d) \
+ __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
+ "cpuid\n\t" \
+ "xchg{l}\t{%%}ebx, %1\n\t" \
+ : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
+ : "0" (level), "2" (count))
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+ nor alternatives in i386 code. */
+#define __cpuid(level, a, b, c, d) \
+ __asm__ ("xchgl\t%%ebx, %1\n\t" \
+ "cpuid\n\t" \
+ "xchgl\t%%ebx, %1\n\t" \
+ : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
+ : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d) \
+ __asm__ ("xchgl\t%%ebx, %1\n\t" \
+ "cpuid\n\t" \
+ "xchgl\t%%ebx, %1\n\t" \
+ : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
+ : "0" (level), "2" (count))
+#endif
+#else
+#define __cpuid(level, a, b, c, d) \
+ __asm__ ("cpuid\n\t" \
+ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
+ : "0" (level))
+
+#define __cpuid_count(level, count, a, b, c, d) \
+ __asm__ ("cpuid\n\t" \
+ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
+ : "0" (level), "2" (count))
+#endif
+
+/* Return highest supported input value for cpuid instruction. ext can
+ be either 0x0 or 0x8000000 to return highest supported value for
+ basic or extended cpuid information. Function returns 0 if cpuid
+ is not supported or whatever cpuid returns in eax register. If sig
+ pointer is non-null, then first four bytes of the signature
+ (as found in ebx register) are returned in location pointed by sig. */
+
+static __inline unsigned int
+__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
+{
+ unsigned int __eax, __ebx, __ecx, __edx;
+
+#ifndef __x86_64__
+#if __GNUC__ >= 3
+ /* See if we can use cpuid. On AMD64 we always can. */
+ __asm__ ("pushf{l|d}\n\t"
+ "pushf{l|d}\n\t"
+ "pop{l}\t%0\n\t"
+ "mov{l}\t{%0, %1|%1, %0}\n\t"
+ "xor{l}\t{%2, %0|%0, %2}\n\t"
+ "push{l}\t%0\n\t"
+ "popf{l|d}\n\t"
+ "pushf{l|d}\n\t"
+ "pop{l}\t%0\n\t"
+ "popf{l|d}\n\t"
+ : "=&r" (__eax), "=&r" (__ebx)
+ : "i" (0x00200000));
+#else
+/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
+ nor alternatives in i386 code. */
+ __asm__ ("pushfl\n\t"
+ "pushfl\n\t"
+ "popl\t%0\n\t"
+ "movl\t%0, %1\n\t"
+ "xorl\t%2, %0\n\t"
+ "pushl\t%0\n\t"
+ "popfl\n\t"
+ "pushfl\n\t"
+ "popl\t%0\n\t"
+ "popfl\n\t"
+ : "=&r" (__eax), "=&r" (__ebx)
+ : "i" (0x00200000));
+#endif
+
+ if (!((__eax ^ __ebx) & 0x00200000))
+ return 0;
+#endif
+
+ /* Host supports cpuid. Return highest supported cpuid input value. */
+ __cpuid (__ext, __eax, __ebx, __ecx, __edx);
+
+ if (__sig)
+ *__sig = __ebx;
+
+ return __eax;
+}
+
+/* Return cpuid data for requested cpuid level, as found in returned
+ eax, ebx, ecx and edx registers. The function checks if cpuid is
+ supported and returns 1 for valid cpuid information or 0 for
+ unsupported cpuid level. All pointers are required to be non-null. */
+
+static __inline int
+__get_cpuid (unsigned int __level,
+ unsigned int *__eax, unsigned int *__ebx,
+ unsigned int *__ecx, unsigned int *__edx)
+{
+ unsigned int __ext = __level & 0x80000000;
+
+ if (__get_cpuid_max (__ext, 0) < __level)
+ return 0;
+
+ __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
+ return 1;
+}
diff --git a/gcc-4.4.0/gcc/config/i386/cross-stdarg.h b/gcc-4.4.0/gcc/config/i386/cross-stdarg.h
new file mode 100644
index 000000000..7139ffa74
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/cross-stdarg.h
@@ -0,0 +1,73 @@
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef __CROSS_STDARG_H_INCLUDED
+#define __CROSS_STDARG_H_INCLUDED
+
+/* Make sure that for non x64 targets cross builtins are defined. */
+#ifndef __x86_64__
+/* Call abi ms_abi. */
+#define __builtin_ms_va_list __builtin_va_list
+#define __builtin_ms_va_copy __builtin_va_copy
+#define __builtin_ms_va_start __builtin_va_start
+#define __builtin_ms_va_end __builtin_va_end
+
+/* Call abi sysv_abi. */
+#define __builtin_sysv_va_list __builtin_va_list
+#define __builtin_sysv_va_copy __builtin_va_copy
+#define __builtin_sysv_va_start __builtin_va_start
+#define __builtin_sysv_va_end __builtin_va_end
+#endif
+
+#define __ms_va_copy(__d,__s) __builtin_ms_va_copy(__d,__s)
+#define __ms_va_start(__v,__l) __builtin_ms_va_start(__v,__l)
+#define __ms_va_arg(__v,__l) __builtin_va_arg(__v,__l)
+#define __ms_va_end(__v) __builtin_ms_va_end(__v)
+
+#define __sysv_va_copy(__d,__s) __builtin_sysv_va_copy(__d,__s)
+#define __sysv_va_start(__v,__l) __builtin_sysv_va_start(__v,__l)
+#define __sysv_va_arg(__v,__l) __builtin_va_arg(__v,__l)
+#define __sysv_va_end(__v) __builtin_sysv_va_end(__v)
+
+#ifndef __GNUC_SYSV_VA_LIST
+#define __GNUC_SYSV_VA_LIST
+ typedef __builtin_sysv_va_list __gnuc_sysv_va_list;
+#endif
+
+#ifndef _SYSV_VA_LIST_DEFINED
+#define _SYSV_VA_LIST_DEFINED
+ typedef __gnuc_sysv_va_list sysv_va_list;
+#endif
+
+#ifndef __GNUC_MS_VA_LIST
+#define __GNUC_MS_VA_LIST
+ typedef __builtin_ms_va_list __gnuc_ms_va_list;
+#endif
+
+#ifndef _MS_VA_LIST_DEFINED
+#define _MS_VA_LIST_DEFINED
+ typedef __gnuc_ms_va_list ms_va_list;
+#endif
+
+#endif /* __CROSS_STDARG_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/crtdll.h b/gcc-4.4.0/gcc/config/i386/crtdll.h
new file mode 100644
index 000000000..1e5cefd62
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/crtdll.h
@@ -0,0 +1,42 @@
+/* Operating system specific defines to be used when targeting GCC for
+ hosting on Windows32, using GNU tools and the Windows32 API Library.
+ This variant uses CRTDLL.DLL instead of MSVCRTDLL.DLL.
+ Copyright (C) 1998, 1999, 2000, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#undef EXTRA_OS_CPP_BUILTINS
+#define EXTRA_OS_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_define ("__CRTDLL__"); \
+ builtin_define ("__MINGW32__"); \
+ builtin_define ("_WIN32"); \
+ builtin_define_std ("WIN32"); \
+ builtin_define_std ("WINNT"); \
+ } \
+ while (0)
+
+#undef LIBGCC_SPEC
+#define LIBGCC_SPEC \
+ "%{mthreads:-lmingwthrd} -lmingw32 -lgcc -lcoldname -libmingwex -lcrtdll"
+
+/* Specify a different entry point when linking a DLL */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{shared|mdll:dllcrt1%O%s} \
+ %{!shared:%{!mdll:crt1%O%s}} %{pg:gcrt1%O%s}"
+
diff --git a/gcc-4.4.0/gcc/config/i386/crtfastmath.c b/gcc-4.4.0/gcc/config/i386/crtfastmath.c
new file mode 100644
index 000000000..1c1ce2c78
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/crtfastmath.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2005, 2007, 2009 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#define MXCSR_DAZ (1 << 6) /* Enable denormals are zero mode */
+#define MXCSR_FTZ (1 << 15) /* Enable flush to zero mode */
+
+#ifndef __x86_64__
+/* All 64-bit targets have SSE and DAZ;
+ only check them explicitly for 32-bit ones. */
+#include "cpuid.h"
+#endif
+
+static void __attribute__((constructor))
+#ifndef __x86_64__
+/* The i386 ABI only requires 4-byte stack alignment, so this is necessary
+ to make sure the fxsave struct gets correct alignment.
+ See PR27537 and PR28621. */
+__attribute__ ((force_align_arg_pointer))
+#endif
+set_fast_math (void)
+{
+#ifndef __x86_64__
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+ return;
+
+ if (edx & bit_SSE)
+ {
+ unsigned int mxcsr = __builtin_ia32_stmxcsr ();
+
+ mxcsr |= MXCSR_FTZ;
+
+ if (edx & bit_FXSAVE)
+ {
+ /* Check if DAZ is available. */
+ struct
+ {
+ unsigned short int cwd;
+ unsigned short int swd;
+ unsigned short int twd;
+ unsigned short int fop;
+ long int fip;
+ long int fcs;
+ long int foo;
+ long int fos;
+ long int mxcsr;
+ long int mxcsr_mask;
+ long int st_space[32];
+ long int xmm_space[32];
+ long int padding[56];
+ } __attribute__ ((aligned (16))) fxsave;
+
+ __builtin_memset (&fxsave, 0, sizeof (fxsave));
+
+ asm volatile ("fxsave %0" : "=m" (fxsave) : "m" (fxsave));
+
+ if (fxsave.mxcsr_mask & MXCSR_DAZ)
+ mxcsr |= MXCSR_DAZ;
+ }
+
+ __builtin_ia32_ldmxcsr (mxcsr);
+ }
+#else
+ unsigned int mxcsr = __builtin_ia32_stmxcsr ();
+ mxcsr |= MXCSR_DAZ | MXCSR_FTZ;
+ __builtin_ia32_ldmxcsr (mxcsr);
+#endif
+}
diff --git a/gcc-4.4.0/gcc/config/i386/crtprec.c b/gcc-4.4.0/gcc/config/i386/crtprec.c
new file mode 100644
index 000000000..4f42a8fa1
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/crtprec.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#if __PREC == 32
+ #define X87CW (0 << 8) /* Single precision (24 bits) */
+#elif __PREC == 64
+ #define X87CW (2 << 8) /* Double precision (53 bits) */
+#elif __PREC == 80
+ #define X87CW (3 << 8) /* Extended precision (64 bits) */
+#else
+ #error "Wrong precision requested."
+#endif
+
+#define X87CW_PCMASK (3 << 8)
+
+static void __attribute__((constructor))
+set_precision (void)
+{
+ unsigned short int cwd;
+
+ asm volatile ("fstcw\t%0" : "=m" (cwd));
+
+ cwd &= ~X87CW_PCMASK;
+ cwd |= X87CW;
+
+ asm volatile ("fldcw\t%0" : : "m" (cwd));
+}
diff --git a/gcc-4.4.0/gcc/config/i386/cygming-crtbegin.c b/gcc-4.4.0/gcc/config/i386/cygming-crtbegin.c
new file mode 100755
index 000000000..367a4bbff
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/cygming-crtbegin.c
@@ -0,0 +1,135 @@
+/* crtbegin object for windows32 targets.
+ Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+ Contributed by Danny Smith <dannysmith@users.sourceforge.net>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Target machine header files require this define. */
+#define IN_LIBGCC2
+
+#include "auto-host.h"
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "unwind-dw2-fde.h"
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#ifndef LIBGCC_SONAME
+#define LIBGCC_SONAME "libgcc_s.dll"
+#endif
+
+#ifndef LIBGCJ_SONAME
+#define LIBGCJ_SONAME "libgcj_s.dll"
+#endif
+
+
+/* Make the declarations weak. This is critical for
+ _Jv_RegisterClasses because it lives in libgcj.a */
+extern void __register_frame_info (const void *, struct object *)
+ TARGET_ATTRIBUTE_WEAK;
+extern void *__deregister_frame_info (const void *)
+ TARGET_ATTRIBUTE_WEAK;
+extern void _Jv_RegisterClasses (const void *) TARGET_ATTRIBUTE_WEAK;
+
+#if defined(HAVE_LD_RO_RW_SECTION_MIXING)
+# define EH_FRAME_SECTION_CONST const
+#else
+# define EH_FRAME_SECTION_CONST
+#endif
+
+/* Stick a label at the beginning of the frame unwind info so we can
+ register/deregister it with the exception handling library code. */
+#if DWARF2_UNWIND_INFO
+static EH_FRAME_SECTION_CONST char __EH_FRAME_BEGIN__[]
+ __attribute__((section(EH_FRAME_SECTION_NAME), aligned(4)))
+ = { };
+
+static struct object obj;
+#endif
+
+#if TARGET_USE_JCR_SECTION
+static void *__JCR_LIST__[]
+ __attribute__ ((unused, section(JCR_SECTION_NAME), aligned(4)))
+ = { };
+#endif
+
+/* Pull in references from libgcc.a(unwind-dw2-fde.o) in the
+ startfile. These are referenced by a ctor and dtor in crtend.o. */
+extern void __gcc_register_frame (void);
+extern void __gcc_deregister_frame (void);
+
+void
+__gcc_register_frame (void)
+{
+#if DWARF2_UNWIND_INFO
+/* Weak undefined symbols won't be pulled in from dlls; hence
+ we first test if the dll is already loaded and, if so,
+ get the symbol's address at run-time. If the dll is not loaded,
+ fallback to weak linkage to static archive. */
+
+ void (*register_frame_fn) (const void *, struct object *);
+ HANDLE h = GetModuleHandle (LIBGCC_SONAME);
+ if (h)
+ register_frame_fn = (void (*) (const void *, struct object *))
+ GetProcAddress (h, "__register_frame_info");
+ else
+ register_frame_fn = __register_frame_info;
+ if (register_frame_fn)
+ register_frame_fn (__EH_FRAME_BEGIN__, &obj);
+#endif
+
+#if TARGET_USE_JCR_SECTION
+ if (__JCR_LIST__[0])
+ {
+ void (*register_class_fn) (const void *);
+ HANDLE h = GetModuleHandle (LIBGCJ_SONAME);
+ if (h)
+ register_class_fn = (void (*) (const void *))
+ GetProcAddress (h, "_Jv_RegisterClasses");
+ else
+ register_class_fn = _Jv_RegisterClasses;
+
+ if (register_class_fn)
+ register_class_fn (__JCR_LIST__);
+ }
+#endif
+}
+
+void
+__gcc_deregister_frame (void)
+{
+#if DWARF2_UNWIND_INFO
+ void * (*deregister_frame_fn) (const void *);
+ HANDLE h = GetModuleHandle (LIBGCC_SONAME);
+ if (h)
+ deregister_frame_fn = (void* (*) (const void *))
+ GetProcAddress (h, "__deregister_frame_info");
+ else
+ deregister_frame_fn = __deregister_frame_info;
+ if (deregister_frame_fn)
+ deregister_frame_fn (__EH_FRAME_BEGIN__);
+#endif
+}
diff --git a/gcc-4.4.0/gcc/config/i386/cygming-crtend.c b/gcc-4.4.0/gcc/config/i386/cygming-crtend.c
new file mode 100755
index 000000000..8c853bfa7
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/cygming-crtend.c
@@ -0,0 +1,88 @@
+/* crtend object for windows32 targets.
+ Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+ Contributed by Danny Smith <dannysmith@users.sourceforge.net>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Target machine header files require this define. */
+#define IN_LIBGCC2
+
+/* auto-host.h is needed by cygming.h for HAVE_GAS_WEAK and here
+ for HAVE_LD_RO_RW_SECTION_MIXING. */
+#include "auto-host.h"
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "unwind-dw2-fde.h"
+
+#if defined(HAVE_LD_RO_RW_SECTION_MIXING)
+# define EH_FRAME_SECTION_CONST const
+#else
+# define EH_FRAME_SECTION_CONST
+#endif
+
+#if DWARF2_UNWIND_INFO
+/* Terminate the frame unwind info section with a 0 as a sentinel;
+ this would be the 'length' field in a real FDE. */
+
+static EH_FRAME_SECTION_CONST int __FRAME_END__[]
+ __attribute__ ((unused, section(EH_FRAME_SECTION_NAME),
+ aligned(4)))
+ = { 0 };
+#endif
+
+#if TARGET_USE_JCR_SECTION
+/* Null terminate the .jcr section array. */
+static void *__JCR_END__[1]
+ __attribute__ ((unused, section(JCR_SECTION_NAME),
+ aligned(sizeof(void *))))
+ = { 0 };
+#endif
+
+extern void __gcc_register_frame (void);
+extern void __gcc_deregister_frame (void);
+
+static void register_frame_ctor (void) __attribute__ ((constructor (0)));
+
+static void
+register_frame_ctor (void)
+{
+ __gcc_register_frame ();
+#if DEFAULT_USE_CXA_ATEXIT
+ /* If we use the __cxa_atexit method to register C++ dtors
+ at object construction, also use atexit to register eh frame
+ info cleanup. */
+ atexit (__gcc_deregister_frame);
+#endif
+}
+
+#if !DEFAULT_USE_CXA_ATEXIT
+static void deregister_frame_dtor (void) __attribute__ ((destructor (0)));
+
+static void
+deregister_frame_dtor (void)
+{
+ __gcc_deregister_frame ();
+}
+#endif
diff --git a/gcc-4.4.0/gcc/config/i386/cygming.h b/gcc-4.4.0/gcc/config/i386/cygming.h
new file mode 100644
index 000000000..db43ffda0
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/cygming.h
@@ -0,0 +1,404 @@
+/* Operating system specific defines to be used when targeting GCC for
+ hosting on Windows32, using a Unix style C library and tools.
+ Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+ 2004, 2005, 2007, 2008, 2009
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define DBX_DEBUGGING_INFO 1
+#define SDB_DEBUGGING_INFO 1
+#if TARGET_64BIT_DEFAULT || defined (HAVE_GAS_PE_SECREL32_RELOC)
+#define DWARF2_DEBUGGING_INFO 1
+#endif
+
+#undef PREFERRED_DEBUGGING_TYPE
+#if (DWARF2_DEBUGGING_INFO)
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#else
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+#endif
+
+#undef TARGET_64BIT_MS_ABI
+#define TARGET_64BIT_MS_ABI (!cfun ? DEFAULT_ABI == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
+
+#undef DEFAULT_ABI
+#define DEFAULT_ABI (TARGET_64BIT ? MS_ABI : SYSV_ABI)
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+ (TARGET_64BIT ? dbx64_register_map[n] \
+ : (write_symbols == DWARF2_DEBUG \
+ ? svr4_dbx_register_map[n] : dbx_register_map[n]))
+
+/* Map gcc register number to DWARF 2 CFA column number. For 32 bit
+ target, always use the svr4_dbx_register_map for DWARF .eh_frame
+ even if we don't use DWARF .debug_frame. */
+#undef DWARF_FRAME_REGNUM
+#define DWARF_FRAME_REGNUM(n) TARGET_64BIT \
+ ? dbx64_register_map[(n)] : svr4_dbx_register_map[(n)]
+
+#ifdef HAVE_GAS_PE_SECREL32_RELOC
+/* Use section relative relocations for debugging offsets. Unlike
+ other targets that fake this by putting the section VMA at 0, PE
+ won't allow it. */
+#define ASM_OUTPUT_DWARF_OFFSET(FILE, SIZE, LABEL, SECTION) \
+ do { \
+ if (SIZE != 4 && (!TARGET_64BIT || SIZE != 8)) \
+ abort (); \
+ \
+ fputs ("\t.secrel32\t", FILE); \
+ assemble_name (FILE, LABEL); \
+ } while (0)
+#endif
+
+#define TARGET_EXECUTABLE_SUFFIX ".exe"
+
+#include <stdio.h>
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_define ("_X86_=1"); \
+ builtin_assert ("system=winnt"); \
+ builtin_define ("__stdcall=__attribute__((__stdcall__))"); \
+ builtin_define ("__fastcall=__attribute__((__fastcall__))"); \
+ builtin_define ("__cdecl=__attribute__((__cdecl__))"); \
+ if (!flag_iso) \
+ { \
+ builtin_define ("_stdcall=__attribute__((__stdcall__))"); \
+ builtin_define ("_fastcall=__attribute__((__fastcall__))"); \
+ builtin_define ("_cdecl=__attribute__((__cdecl__))"); \
+ } \
+ /* Even though linkonce works with static libs, this is needed \
+ to compare typeinfo symbols across dll boundaries. */ \
+ builtin_define ("__GXX_MERGED_TYPEINFO_NAMES=0"); \
+ builtin_define ("__GXX_TYPEINFO_EQUALITY_INLINE=0"); \
+ EXTRA_OS_CPP_BUILTINS (); \
+ } \
+ while (0)
+
+/* Get tree.c to declare a target-specific specialization of
+ merge_decl_attributes. */
+#define TARGET_DLLIMPORT_DECL_ATTRIBUTES 1
+
+/* This macro defines names of additional specifications to put in the specs
+ that can be used in various specifications like CC1_SPEC. Its definition
+ is an initializer with a subgrouping for each command option.
+
+ Each subgrouping contains a string constant, that defines the
+ specification name, and a string constant that used by the GCC driver
+ program.
+
+ Do not define this macro if it does not need to do anything. */
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ { "mingw_include_path", DEFAULT_TARGET_MACHINE }
+
+#undef MATH_LIBRARY
+#define MATH_LIBRARY ""
+
+#define SIZE_TYPE (TARGET_64BIT ? "long long unsigned int" : "unsigned int")
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long long int" : "int")
+
+#define WCHAR_TYPE_SIZE 16
+#define WCHAR_TYPE "short unsigned int"
+
+/* Windows64 continues to use a 32-bit long type. */
+#undef LONG_TYPE_SIZE
+#define LONG_TYPE_SIZE 32
+
+/* Enable parsing of #pragma pack(push,<n>) and #pragma pack(pop). */
+#define HANDLE_PRAGMA_PACK_PUSH_POP 1
+/* Enable push_macro & pop_macro */
+#define HANDLE_PRAGMA_PUSH_POP_MACRO 1
+
+union tree_node;
+#define TREE union tree_node *
+
+#define drectve_section() \
+ (fprintf (asm_out_file, "\t.section .drectve\n"), \
+ in_section = NULL)
+
+/* Older versions of gas don't handle 'r' as data.
+ Explicitly set data flag with 'd'. */
+#define READONLY_DATA_SECTION_ASM_OP "\t.section .rdata,\"dr\""
+
+/* Don't allow flag_pic to propagate since gas may produce invalid code
+ otherwise. */
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS \
+do { \
+ if (flag_pic) \
+ { \
+ warning (0, "-f%s ignored for target (all code is position independent)",\
+ (flag_pic > 1) ? "PIC" : "pic"); \
+ flag_pic = 0; \
+ } \
+} while (0) \
+
+/* Define this macro if references to a symbol must be treated
+ differently depending on something about the variable or
+ function named by the symbol (such as what section it is in).
+
+ On i386 running Windows NT, modify the assembler name with a suffix
+ consisting of an atsign (@) followed by string of digits that represents
+ the number of bytes of arguments passed to the function, if it has the
+ attribute STDCALL.
+
+ In addition, we must mark dll symbols specially. Definitions of
+ dllexport'd objects install some info in the .drectve section.
+ References to dllimport'd objects are fetched indirectly via
+ _imp__. If both are declared, dllexport overrides. This is also
+ needed to implement one-only vtables: they go into their own
+ section and we need to set DECL_SECTION_NAME so we do that here.
+ Note that we can be called twice on the same decl. */
+
+#define SUBTARGET_ENCODE_SECTION_INFO i386_pe_encode_section_info
+
+/* Output a common block. */
+#undef ASM_OUTPUT_ALIGNED_DECL_COMMON
+#define ASM_OUTPUT_ALIGNED_DECL_COMMON \
+ i386_pe_asm_output_aligned_decl_common
+
+/* Output the label for an initialized variable. */
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL) \
+do { \
+ i386_pe_maybe_record_exported_symbol (DECL, NAME, 1); \
+ ASM_OUTPUT_LABEL ((STREAM), (NAME)); \
+} while (0)
+
+/* Output a reference to a label. Fastcall function symbols
+ keep their '@' prefix, while other symbols are prefixed
+ with user_label_prefix. */
+#undef ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM, NAME) \
+do { \
+ if ((NAME)[0] != FASTCALL_PREFIX) \
+ fputs (user_label_prefix, (STREAM)); \
+ fputs ((NAME), (STREAM)); \
+} while (0)
+
+
+/* Emit code to check the stack when allocating more than 4000
+ bytes in one go. */
+#define CHECK_STACK_LIMIT 4000
+
+#undef STACK_BOUNDARY
+#define STACK_BOUNDARY (DEFAULT_ABI == MS_ABI ? 128 : BITS_PER_WORD)
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+ returns float values in the 387 and needs stack probes.
+ We also align doubles to 64-bits for MSVC default compatibility. */
+
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+ (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS \
+ | MASK_STACK_PROBE | MASK_ALIGN_DOUBLE)
+
+#undef TARGET_SUBTARGET64_DEFAULT
+#define TARGET_SUBTARGET64_DEFAULT \
+ MASK_128BIT_LONG_DOUBLE
+
+/* This is how to output an assembler line
+ that says to advance the location counter
+ to a multiple of 2**LOG bytes. */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ if ((LOG)!=0) fprintf ((FILE), "\t.align %d\n", 1<<(LOG))
+
+/* Windows uses explicit import from shared libraries. */
+#define MULTIPLE_SYMBOL_SPACES 1
+
+#define TARGET_ASM_UNIQUE_SECTION i386_pe_unique_section
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+
+#define SUPPORTS_ONE_ONLY 1
+
+/* Switch into a generic section. */
+#define TARGET_ASM_NAMED_SECTION i386_pe_asm_named_section
+
+/* Select attributes for named sections. */
+#define TARGET_SECTION_TYPE_FLAGS i386_pe_section_type_flags
+
+/* Write the extra assembler code needed to declare a function
+ properly. If we are generating SDB debugging information, this
+ will happen automatically, so we only need to handle other cases. */
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
+ do \
+ { \
+ i386_pe_maybe_record_exported_symbol (DECL, NAME, 0); \
+ if (write_symbols != SDB_DEBUG) \
+ i386_pe_declare_function_type (FILE, NAME, TREE_PUBLIC (DECL)); \
+ ASM_OUTPUT_LABEL (FILE, NAME); \
+ } \
+ while (0)
+
+/* Add an external function to the list of functions to be declared at
+ the end of the file. */
+#define ASM_OUTPUT_EXTERNAL(FILE, DECL, NAME) \
+ do \
+ { \
+ if (TREE_CODE (DECL) == FUNCTION_DECL) \
+ i386_pe_record_external_function ((DECL), (NAME)); \
+ } \
+ while (0)
+
+/* Declare the type properly for any external libcall. */
+#define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \
+ i386_pe_declare_function_type (FILE, XSTR (FUN, 0), 1)
+
+/* This says out to put a global symbol in the BSS section. */
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+ asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+/* Output function declarations at the end of the file. */
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END i386_pe_file_end
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START " #"
+
+#ifndef DWARF2_UNWIND_INFO
+/* If configured with --disable-sjlj-exceptions, use DWARF2, else
+ default to SJLJ. */
+#if (defined (CONFIG_SJLJ_EXCEPTIONS) && !CONFIG_SJLJ_EXCEPTIONS)
+/* The logic of this #if must be kept synchronised with the logic
+ for selecting the tmake_eh_file fragment in config.gcc. */
+#define DWARF2_UNWIND_INFO 1
+#else
+#define DWARF2_UNWIND_INFO 0
+#endif
+#endif
+
+/* Don't assume anything about the header files. */
+#define NO_IMPLICIT_EXTERN_C
+
+#undef PROFILE_HOOK
+#define PROFILE_HOOK(LABEL) \
+ if (MAIN_NAME_P (DECL_NAME (current_function_decl))) \
+ { \
+ emit_call_insn (gen_rtx_CALL (VOIDmode, \
+ gen_rtx_MEM (FUNCTION_MODE, \
+ gen_rtx_SYMBOL_REF (Pmode, "_monstartup")), \
+ const0_rtx)); \
+ }
+
+/* Java Native Interface (JNI) methods on Win32 are invoked using the
+ stdcall calling convention. */
+#undef MODIFY_JNI_METHOD_CALL
+#define MODIFY_JNI_METHOD_CALL(MDECL) \
+ build_type_attribute_variant ((MDECL), \
+ build_tree_list (get_identifier ("stdcall"), \
+ NULL))
+
+/* For Win32 ABI compatibility */
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* MSVC returns aggregate types of up to 8 bytes via registers.
+ See i386.c:ix86_return_in_memory. */
+#undef MS_AGGREGATE_RETURN
+#define MS_AGGREGATE_RETURN 1
+
+/* Biggest alignment supported by the object file format of this
+ machine. Use this macro to limit the alignment which can be
+ specified using the `__attribute__ ((aligned (N)))' construct. If
+ not defined, the default value is `BIGGEST_ALIGNMENT'. */
+/* IMAGE_SCN_ALIGN_8192BYTES is the largest section alignment flag
+ specified in the PECOFF60 spec. Native MS compiler also limits
+ user-specified alignment to 8192 bytes. */
+#undef MAX_OFILE_ALIGNMENT
+#define MAX_OFILE_ALIGNMENT (8192 * 8)
+
+/* BIGGEST_FIELD_ALIGNMENT macro is used directly by libobjc, There, we
+ align internal doubles in structures on dword boundaries. Otherwise,
+ support vector modes using ADJUST_FIELD_ALIGN, defined in i386.h. */
+#ifdef IN_TARGET_LIBS
+#undef BIGGEST_FIELD_ALIGNMENT
+#define BIGGEST_FIELD_ALIGNMENT 64
+#endif
+
+/* A bit-field declared as `int' forces `int' alignment for the struct. */
+#undef PCC_BITFIELD_TYPE_MATTERS
+#define PCC_BITFIELD_TYPE_MATTERS 1
+#define GROUP_BITFIELDS_BY_ALIGN TYPE_NATIVE(rec)
+
+/* Enable alias attribute support. */
+#ifndef SET_ASM_OP
+#define SET_ASM_OP "\t.set\t"
+#endif
+
+/* This implements the `alias' attribute, keeping any stdcall or
+ fastcall decoration. */
+#undef ASM_OUTPUT_DEF_FROM_DECLS
+#define ASM_OUTPUT_DEF_FROM_DECLS(STREAM, DECL, TARGET) \
+ do \
+ { \
+ const char *alias \
+ = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (DECL)); \
+ if (TREE_CODE (DECL) == FUNCTION_DECL) \
+ i386_pe_declare_function_type (STREAM, alias, \
+ TREE_PUBLIC (DECL)); \
+ ASM_OUTPUT_DEF (STREAM, alias, IDENTIFIER_POINTER (TARGET)); \
+ } while (0)
+
+/* GNU as supports weak symbols on PECOFF. */
+#ifdef HAVE_GAS_WEAK
+#define ASM_WEAKEN_LABEL(FILE, NAME) \
+ do \
+ { \
+ fputs ("\t.weak\t", (FILE)); \
+ assemble_name ((FILE), (NAME)); \
+ fputc ('\n', (FILE)); \
+ } \
+ while (0)
+#endif /* HAVE_GAS_WEAK */
+
+/* FIXME: SUPPORTS_WEAK && TARGET_HAVE_NAMED_SECTIONS is true,
+ but for .jcr section to work we also need crtbegin and crtend
+ objects. */
+#define TARGET_USE_JCR_SECTION 0
+
+/* Decide whether it is safe to use a local alias for a virtual function
+ when constructing thunks. */
+#undef TARGET_USE_LOCAL_THUNK_ALIAS_P
+#define TARGET_USE_LOCAL_THUNK_ALIAS_P(DECL) (!DECL_ONE_ONLY (DECL))
+
+#define SUBTARGET_ATTRIBUTE_TABLE \
+ { "selectany", 0, 0, true, false, false, ix86_handle_selectany_attribute }
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+
+/* mcount() does not need a counter variable. */
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS 1
+
+#define TARGET_VALID_DLLIMPORT_ATTRIBUTE_P i386_pe_valid_dllimport_attribute_p
+#define TARGET_CXX_ADJUST_CLASS_AT_DEFINITION i386_pe_adjust_class_at_definition
+#define TARGET_MANGLE_DECL_ASSEMBLER_NAME i386_pe_mangle_decl_assembler_name
+
+#undef TREE
+
+#ifndef BUFSIZ
+# undef FILE
+#endif
diff --git a/gcc-4.4.0/gcc/config/i386/cygming.opt b/gcc-4.4.0/gcc/config/i386/cygming.opt
new file mode 100644
index 000000000..7c29eb89b
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/cygming.opt
@@ -0,0 +1,47 @@
+; Cygwin- and MinGW-specific options.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3. If not see
+; <http://www.gnu.org/licenses/>.
+
+mconsole
+Target RejectNegative
+Create console application
+
+mcygwin
+Target
+Use the Cygwin interface
+
+mdll
+Target RejectNegative
+Generate code for a DLL
+
+mnop-fun-dllimport
+Target Report Var(TARGET_NOP_FUN_DLLIMPORT)
+Ignore dllimport for functions
+
+mthreads
+Target RejectNegative
+Use Mingw-specific thread support
+
+mwin32
+Target
+Set Windows defines
+
+mwindows
+Target
+Create GUI application
diff --git a/gcc-4.4.0/gcc/config/i386/cygwin.asm b/gcc-4.4.0/gcc/config/i386/cygwin.asm
new file mode 100644
index 000000000..588c12ee7
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/cygwin.asm
@@ -0,0 +1,126 @@
+/* stuff needed for libgcc on win32.
+ *
+ * Copyright (C) 1996, 1998, 2001, 2003, 2008, 2009 Free Software Foundation, Inc.
+ * Written By Steve Chamberlain
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef L_chkstk
+
+/* Function prologue calls _alloca to probe the stack when allocating more
+ than CHECK_STACK_LIMIT bytes in one go. Touching the stack at 4K
+ increments is necessary to ensure that the guard pages used
+ by the OS virtual memory manger are allocated in correct sequence. */
+
+ .global ___chkstk
+ .global __alloca
+#ifndef _WIN64
+___chkstk:
+__alloca:
+ pushl %ecx /* save temp */
+ leal 8(%esp), %ecx /* point past return addr */
+ cmpl $0x1000, %eax /* > 4k ?*/
+ jb Ldone
+
+Lprobe:
+ subl $0x1000, %ecx /* yes, move pointer down 4k*/
+ orl $0x0, (%ecx) /* probe there */
+ subl $0x1000, %eax /* decrement count */
+ cmpl $0x1000, %eax
+ ja Lprobe /* and do it again */
+
+Ldone:
+ subl %eax, %ecx
+ orl $0x0, (%ecx) /* less than 4k, just peek here */
+
+ movl %esp, %eax /* save old stack pointer */
+ movl %ecx, %esp /* decrement stack */
+ movl (%eax), %ecx /* recover saved temp */
+ movl 4(%eax), %eax /* recover return address */
+
+ /* Push the return value back. Doing this instead of just
+ jumping to %eax preserves the cached call-return stack
+ used by most modern processors. */
+ pushl %eax
+ ret
+#else
+/* __alloca is a normal function call, which uses %rcx as the argument. And stack space
+ for the argument is saved. */
+__alloca:
+ movq %rcx, %rax
+ addq $0x7, %rax
+ andq $0xfffffffffffffff8, %rax
+ popq %rcx /* pop return address */
+ popq %r10 /* Pop the reserved stack space. */
+ movq %rsp, %r10 /* get sp */
+ cmpq $0x1000, %rax /* > 4k ?*/
+ jb Ldone_alloca
+
+Lprobe_alloca:
+ subq $0x1000, %r10 /* yes, move pointer down 4k*/
+ orq $0x0, (%r10) /* probe there */
+ subq $0x1000, %rax /* decrement count */
+ cmpq $0x1000, %rax
+ ja Lprobe_alloca /* and do it again */
+
+Ldone_alloca:
+ subq %rax, %r10
+ orq $0x0, (%r10) /* less than 4k, just peek here */
+ movq %r10, %rax
+ subq $0x8, %r10 /* Reserve argument stack space. */
+ movq %r10, %rsp /* decrement stack */
+
+ /* Push the return value back. Doing this instead of just
+ jumping to %rcx preserves the cached call-return stack
+ used by most modern processors. */
+ pushq %rcx
+ ret
+
+/* ___chkstk is a *special* function call, which uses %rax as the argument.
+ We avoid clobbering the 4 integer argument registers, %rcx, %rdx,
+ %r8 and %r9, which leaves us with %rax, %r10, and %r11 to use. */
+___chkstk:
+ addq $0x7, %rax /* Make sure stack is on alignment of 8. */
+ andq $0xfffffffffffffff8, %rax
+ popq %r11 /* pop return address */
+ movq %rsp, %r10 /* get sp */
+ cmpq $0x1000, %rax /* > 4k ?*/
+ jb Ldone
+
+Lprobe:
+ subq $0x1000, %r10 /* yes, move pointer down 4k*/
+ orl $0x0, (%r10) /* probe there */
+ subq $0x1000, %rax /* decrement count */
+ cmpq $0x1000, %rax
+ ja Lprobe /* and do it again */
+
+Ldone:
+ subq %rax, %r10
+ orl $0x0, (%r10) /* less than 4k, just peek here */
+ movq %r10, %rsp /* decrement stack */
+
+ /* Push the return value back. Doing this instead of just
+ jumping to %r11 preserves the cached call-return stack
+ used by most modern processors. */
+ pushq %r11
+ ret
+#endif
+#endif
diff --git a/gcc-4.4.0/gcc/config/i386/cygwin.h b/gcc-4.4.0/gcc/config/i386/cygwin.h
new file mode 100644
index 000000000..02d059f76
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/cygwin.h
@@ -0,0 +1,269 @@
+/* Operating system specific defines to be used when targeting GCC for
+ hosting on Windows32, using a Unix style C library and tools.
+ Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+ 2007, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define TARGET_VERSION fprintf (stderr, " (x86 Cygwin)");
+
+#define EXTRA_OS_CPP_BUILTINS() /* Nothing. */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%(cpp_cpu) %{posix:-D_POSIX_SOURCE} \
+ %{mno-win32:%{mno-cygwin: %emno-cygwin and mno-win32 are not compatible}} \
+ %{mno-cygwin:-D__MSVCRT__ -D__MINGW32__ %{!ansi:%{mthreads:-D_MT}}}\
+ %{!mno-cygwin:-D__CYGWIN32__ -D__CYGWIN__ %{!ansi:-Dunix} -D__unix__ -D__unix }\
+ %{mwin32|mno-cygwin:-DWIN32 -D_WIN32 -D__WIN32 -D__WIN32__ %{!ansi:-DWINNT}}\
+ %{!nostdinc:%{!mno-win32|mno-cygwin:-idirafter ../include/w32api%s -idirafter ../../include/w32api%s}}\
+"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "\
+ %{shared|mdll: %{mno-cygwin:dllcrt2%O%s}}\
+ %{!shared: %{!mdll: %{!mno-cygwin:crt0%O%s} %{mno-cygwin:crt2%O%s}\
+ %{pg:gcrt0%O%s}}}\
+ crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s}\
+ crtend.o%s"
+
+/* Normally, -lgcc is not needed since everything in it is in the DLL, but we
+ want to allow things to be added to it when installing new versions of
+ GCC without making a new CYGWIN.DLL, so we leave it. Profiling is handled
+ by calling the init function from main. */
+
+#ifdef ENABLE_SHARED_LIBGCC
+#define SHARED_LIBGCC_SPEC " \
+ %{static|static-libgcc:-lgcc -lgcc_eh} \
+ %{!static: \
+ %{!static-libgcc: \
+ %{!shared: \
+ %{!shared-libgcc:-lgcc -lgcc_eh} \
+ %{shared-libgcc:-lgcc_s -lgcc} \
+ } \
+ %{shared:-lgcc_s -lgcc} \
+ } \
+ } "
+#else
+#define SHARED_LIBGCC_SPEC " -lgcc "
+#endif
+
+#undef REAL_LIBGCC_SPEC
+#define REAL_LIBGCC_SPEC \
+ "%{mno-cygwin: %{mthreads:-lmingwthrd} -lmingw32} \
+ " SHARED_LIBGCC_SPEC " \
+ %{mno-cygwin:-lmoldname -lmingwex -lmsvcrt}"
+
+/* We have to dynamic link to get to the system DLLs. All of libc, libm and
+ the Unix stuff is in cygwin.dll. The import library is called
+ 'libcygwin.a'. For Windows applications, include more libraries, but
+ always include kernel32. We'd like to specific subsystem windows to
+ ld, but that doesn't work just yet. */
+
+#undef LIB_SPEC
+#define LIB_SPEC "\
+ %{pg:-lgmon} \
+ %{!mno-cygwin:-lcygwin} \
+ %{mno-cygwin:%{mthreads:-lmingwthrd} -lmingw32} \
+ %{mwindows:-lgdi32 -lcomdlg32} \
+ -luser32 -lkernel32 -ladvapi32 -lshell32"
+
+#define LINK_SPEC "\
+ %{mwindows:--subsystem windows} \
+ %{mconsole:--subsystem console} \
+ %{shared: %{mdll: %eshared and mdll are not compatible}} \
+ %{shared: --shared} %{mdll:--dll} \
+ %{static:-Bstatic} %{!static:-Bdynamic} \
+ %{shared|mdll: -e \
+ %{mno-cygwin:_DllMainCRTStartup@12} \
+ %{!mno-cygwin:__cygwin_dll_entry@12}}\
+ %{!mno-cygwin:--dll-search-prefix=cyg}"
+
+/* Allocate space for all of the machine-spec-specific stuff.
+ Allocate enough space for cygwin -> mingw32 munging plus
+ possible addition of "/mingw". */
+
+#ifndef CYGWIN_MINGW_SUBDIR
+#define CYGWIN_MINGW_SUBDIR "/mingw"
+#endif
+#define CYGWIN_MINGW_SUBDIR_LEN (sizeof (CYGWIN_MINGW_SUBDIR) - 1)
+
+#ifdef GPLUSPLUS_INCLUDE_DIR
+char cygwin_gplusplus_include_dir[sizeof (GPLUSPLUS_INCLUDE_DIR) + 1
+ + (CYGWIN_MINGW_SUBDIR_LEN)]
+ = GPLUSPLUS_INCLUDE_DIR;
+#undef GPLUSPLUS_INCLUDE_DIR
+#define GPLUSPLUS_INCLUDE_DIR ((const char *) cygwin_gplusplus_include_dir)
+#ifndef GEN_CVT_ARRAY
+#define GEN_CVT_ARRAY
+#endif
+#endif
+
+#ifdef GPLUSPLUS_TOOL_INCLUDE_DIR
+char cygwin_gplusplus_tool_include_dir[sizeof (GPLUSPLUS_TOOL_INCLUDE_DIR) + 1
+ + CYGWIN_MINGW_SUBDIR_LEN]
+ = GPLUSPLUS_TOOL_INCLUDE_DIR;
+#undef GPLUSPLUS_TOOL_INCLUDE_DIR
+#define GPLUSPLUS_TOOL_INCLUDE_DIR ((const char *) cygwin_gplusplus_tool_include_dir)
+#ifndef GEN_CVT_ARRAY
+#define GEN_CVT_ARRAY
+#endif
+#endif
+
+#ifdef GPLUSPLUS_BACKWARD_INCLUDE_DIR
+char cygwin_gplusplus_backward_include_dir[sizeof (GPLUSPLUS_BACKWARD_INCLUDE_DIR) + 1
+ + CYGWIN_MINGW_SUBDIR_LEN]
+ = GPLUSPLUS_BACKWARD_INCLUDE_DIR;
+#undef GPLUSPLUS_BACKWARD_INCLUDE_DIR
+#define GPLUSPLUS_BACKWARD_INCLUDE_DIR ((const char *) cygwin_gplusplus_backward_include_dir)
+#ifndef GEN_CVT_ARRAY
+#define GEN_CVT_ARRAY
+#endif
+#endif
+
+#ifdef LOCAL_INCLUDE_DIR
+char cygwin_local_include_dir[sizeof (LOCAL_INCLUDE_DIR) + 1
+ + CYGWIN_MINGW_SUBDIR_LEN]
+ = LOCAL_INCLUDE_DIR;
+#undef LOCAL_INCLUDE_DIR
+#define LOCAL_INCLUDE_DIR ((const char *) cygwin_local_include_dir)
+#ifndef GEN_CVT_ARRAY
+#define GEN_CVT_ARRAY
+#endif
+#endif
+
+#ifdef CROSS_INCLUDE_DIR
+char cygwin_cross_include_dir[sizeof (CROSS_INCLUDE_DIR) + 1
+ + CYGWIN_MINGW_SUBDIR_LEN]
+ = CROSS_INCLUDE_DIR;
+#undef CROSS_INCLUDE_DIR
+#define CROSS_INCLUDE_DIR ((const char *) cygwin_cross_include_dir)
+#ifndef GEN_CVT_ARRAY
+#define GEN_CVT_ARRAY
+#endif
+#endif
+
+#ifdef TOOL_INCLUDE_DIR
+char cygwin_tool_include_dir[sizeof (TOOL_INCLUDE_DIR) + 1
+ + CYGWIN_MINGW_SUBDIR_LEN]
+ = TOOL_INCLUDE_DIR;
+#undef TOOL_INCLUDE_DIR
+#define TOOL_INCLUDE_DIR ((const char *) cygwin_tool_include_dir)
+
+#ifndef CROSS_DIRECTORY_STRUCTURE
+#undef STANDARD_INCLUDE_DIR
+#define STANDARD_INCLUDE_DIR "/usr/include"
+char cygwin_standard_include_dir[sizeof (STANDARD_INCLUDE_DIR) + 1
+ + CYGWIN_MINGW_SUBDIR_LEN]
+ = STANDARD_INCLUDE_DIR;
+#undef STANDARD_INCLUDE_DIR
+#define STANDARD_INCLUDE_DIR ((const char *) cygwin_standard_include_dir)
+#endif
+
+#ifndef GEN_CVT_ARRAY
+#define GEN_CVT_ARRAY
+#endif
+#endif
+
+#ifndef GEN_CVT_ARRAY
+extern char *cvt_to_mingw[];
+#else
+char *cvt_to_mingw[] =
+ {
+#ifdef GPLUSPLUS_INCLUDE_DIR
+ cygwin_gplusplus_include_dir,
+#endif
+
+#ifdef GPLUSPLUS_TOOL_INCLUDE_DIR
+ cygwin_gplusplus_tool_include_dir,
+#endif
+
+#ifdef GPLUSPLUS_BACKWARD_INCLUDE_DIR
+ cygwin_gplusplus_backward_include_dir,
+#endif
+
+#ifdef LOCAL_INCLUDE_DIR
+ cygwin_local_include_dir,
+#endif
+
+#ifdef CROSS_INCLUDE_DIR
+ cygwin_cross_include_dir,
+#endif
+
+#ifdef TOOL_INCLUDE_DIR
+ cygwin_tool_include_dir,
+#endif
+
+#ifdef STANDARD_INCLUDE_DIR
+ cygwin_standard_include_dir,
+#endif
+
+ NULL
+ };
+#undef GEN_CVT_ARRAY
+#endif /*GEN_CVT_ARRAY*/
+
+void mingw_scan (int, const char * const *, const char **);
+#if 1
+#define GCC_DRIVER_HOST_INITIALIZATION \
+do \
+{ \
+ mingw_scan(argc, (const char * const *) argv, &spec_machine); \
+ } \
+while (0)
+#else
+#define GCC_DRIVER_HOST_INITIALIZATION \
+do \
+{ \
+ char *cprefix = concat (tooldir_base_prefix, spec_machine, \
+ dir_separator_str, NULL); \
+ if (!IS_ABSOLUTE_PATH (cprefix)) \
+ cprefix = concat (standard_exec_prefix, spec_machine, dir_separator_str, \
+ spec_version, dir_separator_str, tooldir_prefix, NULL); \
+ add_prefix (&exec_prefixes,\
+ concat (cprefix, "../../../../", spec_machine, "/bin/", NULL), \
+ "BINUTILS", PREFIX_PRIORITY_LAST, 0, NULL); \
+ add_prefix (&exec_prefixes, cprefix, \
+ "BINUTILS", PREFIX_PRIORITY_LAST, 0, NULL); \
+ add_prefix (&startfile_prefixes,\
+ concat (standard_startfile_prefix, "w32api", NULL),\
+ "GCC", PREFIX_PRIORITY_LAST, 0, NULL);\
+ mingw_scan(argc, (const char * const *) argv, &spec_machine); \
+ } \
+while (0)
+#endif
+
+/* Binutils does not handle weak symbols from dlls correctly. For now,
+ do not use them unnecessarily in gthr-posix.h. */
+#define GTHREAD_USE_WEAK 0
+
+/* Every program on cygwin links against cygwin1.dll which contains
+ the pthread routines. There is no need to explicitly link them
+ and the -pthread flag is not recognized. */
+#undef GOMP_SELF_SPECS
+#define GOMP_SELF_SPECS ""
+
+/* This matches SHLIB_SONAME and SHLIB_SOVERSION in t-cygwin. */
+#if DWARF2_UNWIND_INFO
+#define LIBGCC_EH_EXTN ""
+#else
+#define LIBGCC_EH_EXTN "-sjlj"
+#endif
+#define LIBGCC_SONAME "cyggcc_s" LIBGCC_EH_EXTN "-1.dll"
diff --git a/gcc-4.4.0/gcc/config/i386/cygwin1.c b/gcc-4.4.0/gcc/config/i386/cygwin1.c
new file mode 100644
index 000000000..7de34d24b
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/cygwin1.c
@@ -0,0 +1,53 @@
+/* Helper routines for cygwin-specific command-line parsing.
+ Contributed by Christopher Faylor (cgf@redhat.com)
+ Copyright 2003, 2005, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include <string.h>
+
+void
+mingw_scan (int argc ATTRIBUTE_UNUSED,
+ const char *const *argv,
+ const char **spec_machine)
+{
+ putenv (xstrdup ("GCC_CYGWIN_MINGW=0"));
+
+ while (*++argv)
+ if (strcmp (*argv, "-mno-win32") == 0)
+ putenv (xstrdup ("GCC_CYGWIN_WIN32=0"));
+ else if (strcmp (*argv, "-mwin32") == 0)
+ putenv (xstrdup ("GCC_CYGWIN_WIN32=1"));
+ else if (strcmp (*argv, "-mno-cygwin") == 0)
+ {
+ char *p = strstr (*spec_machine, "-cygwin");
+ if (p)
+ {
+ int len = p - *spec_machine;
+ char *s = XNEWVEC (char, strlen (*spec_machine) + 3);
+ memcpy (s, *spec_machine, len);
+ strcpy (s + len, "-mingw32");
+ *spec_machine = s;
+ }
+ putenv (xstrdup ("GCC_CYGWIN_MINGW=1"));
+ }
+ return;
+}
diff --git a/gcc-4.4.0/gcc/config/i386/cygwin2.c b/gcc-4.4.0/gcc/config/i386/cygwin2.c
new file mode 100644
index 000000000..c48e2a52c
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/cygwin2.c
@@ -0,0 +1,66 @@
+/* Helper routines for cygwin-specific command-line parsing.
+ Contributed by Christopher Faylor (cgf@redhat.com)
+ Copyright 2003, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+#include "safe-ctype.h"
+#include <string.h>
+
+/*
+static void remove_w32api (void);
+*/
+static void add_mingw (void);
+static void set_mingw (void) __attribute__ ((constructor));
+
+static void
+add_mingw (void)
+{
+ char **av;
+ char *p;
+ for (av = cvt_to_mingw; *av; av++)
+ {
+ int sawcygwin = 0;
+ while ((p = strstr (*av, "-cygwin")))
+ {
+ char *over = p + sizeof ("-cygwin") - 1;
+ memmove (over + 1, over, strlen (over));
+ memcpy (p, "-mingw32", sizeof("-mingw32") - 1);
+ p = ++over;
+ while (ISALNUM (*p))
+ p++;
+ strcpy (over, p);
+ sawcygwin = 1;
+ }
+ if (!sawcygwin && !strstr (*av, "mingw"))
+ strcat (*av, CYGWIN_MINGW_SUBDIR);
+ }
+}
+
+
+static void
+set_mingw (void)
+{
+ char *env = getenv ("GCC_CYGWIN_MINGW");
+ if (env && *env == '1')
+ add_mingw ();
+}
diff --git a/gcc-4.4.0/gcc/config/i386/darwin-libgcc.10.4.ver b/gcc-4.4.0/gcc/config/i386/darwin-libgcc.10.4.ver
new file mode 100644
index 000000000..aaeb934fe
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/darwin-libgcc.10.4.ver
@@ -0,0 +1,81 @@
+__Unwind_Backtrace
+__Unwind_DeleteException
+__Unwind_FindEnclosingFunction
+__Unwind_Find_FDE
+__Unwind_ForcedUnwind
+__Unwind_GetCFA
+__Unwind_GetDataRelBase
+__Unwind_GetGR
+__Unwind_GetIP
+__Unwind_GetLanguageSpecificData
+__Unwind_GetRegionStart
+__Unwind_GetTextRelBase
+__Unwind_RaiseException
+__Unwind_Resume
+__Unwind_Resume_or_Rethrow
+__Unwind_SetGR
+__Unwind_SetIP
+___absvdi2
+___absvsi2
+___addvdi3
+___addvsi3
+___ashldi3
+___ashrdi3
+___clear_cache
+___clzdi2
+___clzsi2
+___cmpdi2
+___ctzdi2
+___ctzsi2
+___deregister_frame
+___deregister_frame_info
+___deregister_frame_info_bases
+___divdc3
+___divdi3
+___divsc3
+___divxc3
+___enable_execute_stack
+___ffsdi2
+___fixdfdi
+___fixsfdi
+___fixunsdfdi
+___fixunsdfsi
+___fixunssfdi
+___fixunssfsi
+___fixunsxfdi
+___fixunsxfsi
+___fixxfdi
+___floatdidf
+___floatdisf
+___floatdixf
+___gcc_personality_v0
+___lshrdi3
+___moddi3
+___muldc3
+___muldi3
+___mulsc3
+___mulvdi3
+___mulvsi3
+___mulxc3
+___negdi2
+___negvdi2
+___negvsi2
+___paritydi2
+___paritysi2
+___popcountdi2
+___popcountsi2
+___powidf2
+___powisf2
+___powixf2
+___register_frame
+___register_frame_info
+___register_frame_info_bases
+___register_frame_info_table
+___register_frame_info_table_bases
+___register_frame_table
+___subvdi3
+___subvsi3
+___ucmpdi2
+___udivdi3
+___udivmoddi4
+___umoddi3
diff --git a/gcc-4.4.0/gcc/config/i386/darwin-libgcc.10.5.ver b/gcc-4.4.0/gcc/config/i386/darwin-libgcc.10.5.ver
new file mode 100644
index 000000000..02a085843
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/darwin-libgcc.10.5.ver
@@ -0,0 +1,85 @@
+__Unwind_Backtrace
+__Unwind_DeleteException
+__Unwind_FindEnclosingFunction
+__Unwind_Find_FDE
+__Unwind_ForcedUnwind
+__Unwind_GetCFA
+__Unwind_GetDataRelBase
+__Unwind_GetGR
+__Unwind_GetIP
+__Unwind_GetIPInfo
+__Unwind_GetLanguageSpecificData
+__Unwind_GetRegionStart
+__Unwind_GetTextRelBase
+__Unwind_RaiseException
+__Unwind_Resume
+__Unwind_Resume_or_Rethrow
+__Unwind_SetGR
+__Unwind_SetIP
+___absvdi2
+___absvsi2
+___addvdi3
+___addvsi3
+___ashldi3
+___ashrdi3
+___clear_cache
+___clzdi2
+___clzsi2
+___cmpdi2
+___ctzdi2
+___ctzsi2
+___deregister_frame
+___deregister_frame_info
+___deregister_frame_info_bases
+___divdc3
+___divdi3
+___divsc3
+___divxc3
+___enable_execute_stack
+___ffsdi2
+___fixdfdi
+___fixsfdi
+___fixunsdfdi
+___fixunsdfsi
+___fixunssfdi
+___fixunssfsi
+___fixunsxfdi
+___fixunsxfsi
+___fixxfdi
+___floatdidf
+___floatdisf
+___floatdixf
+___floatundidf
+___floatundisf
+___floatundixf
+___gcc_personality_v0
+___lshrdi3
+___moddi3
+___muldc3
+___muldi3
+___mulsc3
+___mulvdi3
+___mulvsi3
+___mulxc3
+___negdi2
+___negvdi2
+___negvsi2
+___paritydi2
+___paritysi2
+___popcountdi2
+___popcountsi2
+___powidf2
+___powisf2
+___powixf2
+___register_frame
+___register_frame_info
+___register_frame_info_bases
+___register_frame_info_table
+___register_frame_info_table_bases
+___register_frame_table
+___subvdi3
+___subvsi3
+___ucmpdi2
+___udivdi3
+___udivmoddi4
+___umoddi3
diff --git a/gcc-4.4.0/gcc/config/i386/darwin.h b/gcc-4.4.0/gcc/config/i386/darwin.h
new file mode 100644
index 000000000..039f52f77
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/darwin.h
@@ -0,0 +1,302 @@
+/* Target definitions for x86 running Darwin.
+ Copyright (C) 2001, 2002, 2004, 2005, 2006, 2007, 2008
+ Free Software Foundation, Inc.
+ Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Enable Mach-O bits in generic x86 code. */
+#undef TARGET_MACHO
+#define TARGET_MACHO 1
+
+#define TARGET_VERSION fprintf (stderr, " (i686 Darwin)");
+
+#undef TARGET_64BIT
+#define TARGET_64BIT OPTION_ISA_64BIT
+
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __x86_64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+
+/* Size of the Obj-C jump buffer. */
+#define OBJC_JBLEN ((TARGET_64BIT) ? ((9 * 2) + 3 + 16) : (18))
+
+#undef TARGET_FPMATH_DEFAULT
+#define TARGET_FPMATH_DEFAULT (TARGET_SSE ? FPMATH_SSE : FPMATH_387)
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_define ("__LITTLE_ENDIAN__"); \
+ darwin_cpp_builtins (pfile); \
+ } \
+ while (0)
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef MAX_BITS_PER_WORD
+#define MAX_BITS_PER_WORD 64
+
+#undef FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
+#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN (0)
+
+#undef TARGET_KEEPS_VECTOR_ALIGNED_STACK
+#define TARGET_KEEPS_VECTOR_ALIGNED_STACK 1
+
+/* On Darwin, the stack is 128-bit aligned at the point of every call.
+ Failure to ensure this will lead to a crash in the system libraries
+ or dynamic loader. */
+#undef STACK_BOUNDARY
+#define STACK_BOUNDARY 128
+
+#undef MAIN_STACK_BOUNDARY
+#define MAIN_STACK_BOUNDARY 128
+
+/* Since we'll never want a stack boundary less aligned than 128 bits
+ we need the extra work here otherwise bits of gcc get very grumpy
+ when we ask for lower alignment. We could just reject values less
+ than 128 bits for Darwin, but it's easier to up the alignment if
+ it's below the minimum. */
+#undef PREFERRED_STACK_BOUNDARY
+#define PREFERRED_STACK_BOUNDARY \
+ MAX (STACK_BOUNDARY, ix86_preferred_stack_boundary)
+
+/* We want -fPIC by default, unless we're using -static to compile for
+ the kernel or some such. */
+
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu) \
+ %{!mkernel:%{!static:%{!mdynamic-no-pic:-fPIC}}} \
+ %{!mmacosx-version-min=*:-mmacosx-version-min=%(darwin_minversion)} \
+ %{g: %{!fno-eliminate-unused-debug-symbols: -feliminate-unused-debug-symbols }}"
+
+#undef ASM_SPEC
+#define ASM_SPEC "-arch %(darwin_arch) -force_cpusubtype_ALL"
+
+#define DARWIN_ARCH_SPEC "%{m64:x86_64;:i386}"
+#define DARWIN_SUBARCH_SPEC DARWIN_ARCH_SPEC
+
+/* Determine a minimum version based on compiler options. */
+#define DARWIN_MINVERSION_SPEC \
+ "%{!m64|fgnu-runtime:10.4; \
+ ,objective-c|,objc-cpp-output:10.5; \
+ ,objective-c-header:10.5; \
+ ,objective-c++|,objective-c++-cpp-output:10.5; \
+ ,objective-c++-header|,objc++-cpp-output:10.5; \
+ :10.4}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+ %{mpc32:crtprec32.o%s} \
+ %{mpc64:crtprec64.o%s} \
+ %{mpc80:crtprec80.o%s}"
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ DARWIN_EXTRA_SPECS \
+ { "darwin_arch", DARWIN_ARCH_SPEC }, \
+ { "darwin_crt2", "" }, \
+ { "darwin_subarch", DARWIN_SUBARCH_SPEC },
+
+/* Use the following macro for any Darwin/x86-specific command-line option
+ translation. */
+#define SUBTARGET_OPTION_TRANSLATE_TABLE \
+ { "", "" }
+
+/* The Darwin assembler mostly follows AT&T syntax. */
+#undef ASSEMBLER_DIALECT
+#define ASSEMBLER_DIALECT ASM_ATT
+
+/* Define macro used to output shift-double opcodes when the shift
+ count is in %cl. Some assemblers require %cl as an argument;
+ some don't. This macro controls what to do: by default, don't
+ print %cl. */
+
+#define SHIFT_DOUBLE_OMITS_COUNT 0
+
+extern void darwin_x86_file_end (void);
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END darwin_x86_file_end
+
+/* Define the syntax of pseudo-ops, labels and comments. */
+
+/* String containing the assembler's comment-starter. */
+
+#define ASM_COMMENT_START "#"
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+ and returns float values in the 387. */
+
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_128BIT_LONG_DOUBLE)
+
+/* For darwin we want to target specific processor features as a minimum,
+ but these unfortunately don't correspond to a specific processor. */
+#undef TARGET_SUBTARGET32_ISA_DEFAULT
+#define TARGET_SUBTARGET32_ISA_DEFAULT (OPTION_MASK_ISA_MMX \
+ | OPTION_MASK_ISA_SSE \
+ | OPTION_MASK_ISA_SSE2)
+
+#undef TARGET_SUBTARGET64_ISA_DEFAULT
+#define TARGET_SUBTARGET64_ISA_DEFAULT (OPTION_MASK_ISA_MMX \
+ | OPTION_MASK_ISA_SSE \
+ | OPTION_MASK_ISA_SSE2 \
+ | OPTION_MASK_ISA_SSE3)
+
+/* For now, disable dynamic-no-pic. We'll need to go through i386.c
+ with a fine-tooth comb looking for refs to flag_pic! */
+#define MASK_MACHO_DYNAMIC_NO_PIC 0
+#define TARGET_DYNAMIC_NO_PIC (target_flags & MASK_MACHO_DYNAMIC_NO_PIC)
+
+#undef GOT_SYMBOL_NAME
+#define GOT_SYMBOL_NAME MACHOPIC_FUNCTION_BASE_NAME
+
+/* Define the syntax of pseudo-ops, labels and comments. */
+
+#define LPREFIX "L"
+
+/* These are used by -fbranch-probabilities */
+#define HOT_TEXT_SECTION_NAME "__TEXT,__text,regular,pure_instructions"
+#define UNLIKELY_EXECUTED_TEXT_SECTION_NAME \
+ "__TEXT,__unlikely,regular,pure_instructions"
+
+/* Assembler pseudos to introduce constants of various size. */
+
+#define ASM_BYTE_OP "\t.byte\t"
+#define ASM_SHORT "\t.word\t"
+#define ASM_LONG "\t.long\t"
+#define ASM_QUAD "\t.quad\t"
+
+#define SUBTARGET_ENCODE_SECTION_INFO darwin_encode_section_info
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ do { if ((LOG) != 0) \
+ { \
+ if (in_section == text_section) \
+ fprintf (FILE, "\t%s %d,0x90\n", ALIGN_ASM_OP, (LOG)); \
+ else \
+ fprintf (FILE, "\t%s %d\n", ALIGN_ASM_OP, (LOG)); \
+ } \
+ } while (0)
+
+/* This says how to output an assembler line
+ to define a global common symbol. */
+
+#define ASM_OUTPUT_COMMON(FILE, NAME, SIZE, ROUNDED) \
+( fputs (".comm ", (FILE)), \
+ assemble_name ((FILE), (NAME)), \
+ fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (ROUNDED)))
+
+/* This says how to output an assembler line
+ to define a local common symbol. */
+
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED) \
+( fputs (".lcomm ", (FILE)), \
+ assemble_name ((FILE), (NAME)), \
+ fprintf ((FILE), ","HOST_WIDE_INT_PRINT_UNSIGNED"\n", (ROUNDED)))
+
+/* Darwin profiling -- call mcount. */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+ do { \
+ if (MACHOPIC_INDIRECT && !TARGET_64BIT) \
+ { \
+ const char *name = machopic_mcount_stub_name (); \
+ fprintf (FILE, "\tcall %s\n", name+1); /* skip '&' */ \
+ machopic_validate_stub_or_non_lazy_ptr (name); \
+ } \
+ else fprintf (FILE, "\tcall mcount\n"); \
+ } while (0)
+
+#define C_COMMON_OVERRIDE_OPTIONS \
+ do { \
+ SUBTARGET_C_COMMON_OVERRIDE_OPTIONS; \
+ } while (0)
+
+/* Darwin on x86_64 uses dwarf-2 by default. Pre-darwin9 32-bit
+ compiles default to stabs+. darwin9+ defaults to dwarf-2. */
+#ifndef DARWIN_PREFER_DWARF
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE (TARGET_64BIT ? DWARF2_DEBUG : DBX_DEBUG)
+#endif
+
+/* Darwin uses the standard DWARF register numbers but the default
+ register numbers for STABS. Fortunately for 64-bit code the
+ default and the standard are the same. */
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+ (TARGET_64BIT ? dbx64_register_map[n] \
+ : write_symbols == DWARF2_DEBUG ? svr4_dbx_register_map[n] \
+ : dbx_register_map[n])
+
+/* Unfortunately, the 32-bit EH information also doesn't use the standard
+ DWARF register numbers. */
+#define DWARF2_FRAME_REG_OUT(n, for_eh) \
+ (! (for_eh) || write_symbols != DWARF2_DEBUG || TARGET_64BIT ? (n) \
+ : (n) == 5 ? 4 \
+ : (n) == 4 ? 5 \
+ : (n) >= 11 && (n) <= 18 ? (n) + 1 \
+ : (n))
+
+#undef REGISTER_SUBTARGET_PRAGMAS
+#define REGISTER_SUBTARGET_PRAGMAS() DARWIN_REGISTER_TARGET_PRAGMAS()
+
+#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES darwin_set_default_type_attributes
+
+/* For 64-bit, we need to add 4 because @GOTPCREL is relative to the
+ end of the instruction, but without the 4 we'd only have the right
+ address for the start of the instruction. */
+#undef ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+ if (TARGET_64BIT) \
+ { \
+ if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_pcrel) \
+ { \
+ fputs (ASM_LONG, FILE); \
+ assemble_name (FILE, XSTR (ADDR, 0)); \
+ fputs ("+4@GOTPCREL", FILE); \
+ goto DONE; \
+ } \
+ } \
+ else \
+ { \
+ if (ENCODING == ASM_PREFERRED_EH_DATA_FORMAT (2, 1)) \
+ { \
+ darwin_non_lazy_pcrel (FILE, ADDR); \
+ goto DONE; \
+ } \
+ }
+
+/* This needs to move since i386 uses the first flag and other flags are
+ used in Mach-O. */
+#undef MACHO_SYMBOL_FLAG_VARIABLE
+#define MACHO_SYMBOL_FLAG_VARIABLE ((SYMBOL_FLAG_MACH_DEP) << 3)
diff --git a/gcc-4.4.0/gcc/config/i386/darwin64.h b/gcc-4.4.0/gcc/config/i386/darwin64.h
new file mode 100644
index 000000000..9562faa90
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/darwin64.h
@@ -0,0 +1,35 @@
+/* Target definitions for x86_64 running Darwin.
+ Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+ Contributed by Apple Computer Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (x86_64 Darwin)");
+
+#undef DARWIN_ARCH_SPEC
+#define DARWIN_ARCH_SPEC "%{m32:i386;:x86_64}"
+
+#undef DARWIN_SUBARCH_SPEC
+#define DARWIN_SUBARCH_SPEC DARWIN_ARCH_SPEC
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ DARWIN_EXTRA_SPECS \
+ { "darwin_arch", DARWIN_ARCH_SPEC }, \
+ { "darwin_crt2", "" }, \
+ { "darwin_subarch", DARWIN_SUBARCH_SPEC },
diff --git a/gcc-4.4.0/gcc/config/i386/djgpp.h b/gcc-4.4.0/gcc/config/i386/djgpp.h
new file mode 100644
index 000000000..26a35af1e
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/djgpp.h
@@ -0,0 +1,187 @@
+/* Configuration for an i386 running MS-DOS with DJGPP.
+ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2005,
+ 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Support generation of DWARF2 debugging info. */
+#define DWARF2_DEBUGGING_INFO 1
+
+/* Don't assume anything about the header files. */
+#define NO_IMPLICIT_EXTERN_C
+
+#define HANDLE_SYSV_PRAGMA 1
+
+/* Enable parsing of #pragma pack(push,<n>) and #pragma pack(pop). */
+#define HANDLE_PRAGMA_PACK_PUSH_POP 1
+
+/* If defined, a C expression whose value is a string containing the
+ assembler operation to identify the following data as
+ uninitialized global data. If not defined, and neither
+ `ASM_OUTPUT_BSS' nor `ASM_OUTPUT_ALIGNED_BSS' are defined,
+ uninitialized global data will be output in the data section if
+ `-fno-common' is passed, otherwise `ASM_OUTPUT_COMMON' will be
+ used. */
+#undef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+/* Define the name of the .data section. */
+#undef DATA_SECTION_ASM_OP
+#define DATA_SECTION_ASM_OP "\t.section .data"
+
+/* Define the name of the .ident op. */
+#undef IDENT_ASM_OP
+#define IDENT_ASM_OP "\t.ident\t"
+
+/* Enable alias attribute support. */
+#ifndef SET_ASM_OP
+#define SET_ASM_OP "\t.set\t"
+#endif
+
+/* Define the name of the .text section. */
+#undef TEXT_SECTION_ASM_OP
+#define TEXT_SECTION_ASM_OP "\t.section .text"
+
+/* Define standard DJGPP installation paths. */
+/* We override default /usr or /usr/local part with /dev/env/DJDIR which */
+/* points to actual DJGPP installation directory. */
+
+/* Standard include directory */
+#undef STANDARD_INCLUDE_DIR
+#define STANDARD_INCLUDE_DIR "/dev/env/DJDIR/include/"
+
+/* Search for as.exe and ld.exe in DJGPP's binary directory. */
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/dev/env/DJDIR/bin/"
+
+/* Standard DJGPP library and startup files */
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/dev/env/DJDIR/lib/"
+
+/* Correctly handle absolute filename detection in cp/xref.c */
+#define FILE_NAME_ABSOLUTE_P(NAME) \
+ (((NAME)[0] == '/') || ((NAME)[0] == '\\') || \
+ (((NAME)[0] >= 'A') && ((NAME)[0] <= 'z') && ((NAME)[1] == ':')))
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_define_std ("MSDOS"); \
+ builtin_define_std ("GO32"); \
+ builtin_assert ("system=msdos"); \
+ } \
+ while (0)
+
+/* Include <sys/version.h> so __DJGPP__ and __DJGPP_MINOR__ are defined. */
+#undef CPP_SPEC
+#define CPP_SPEC "-remap %{posix:-D_POSIX_SOURCE} \
+ -imacros %s../include/sys/version.h"
+
+/* We need to override link_command_spec in gcc.c so support -Tdjgpp.djl.
+ This cannot be done in LINK_SPECS as that LINK_SPECS is processed
+ before library search directories are known by the linker.
+ This avoids problems when specs file is not available. An alternate way,
+ suggested by Robert Hoehne, is to use SUBTARGET_EXTRA_SPECS instead.
+*/
+
+#undef LINK_COMMAND_SPEC
+#define LINK_COMMAND_SPEC \
+"%{!fsyntax-only: \
+%{!c:%{!M:%{!MM:%{!E:%{!S:%(linker) %l %X %{o*} %{A} %{d} %{e*} %{m} %{N} %{n} \
+\t%{r} %{s} %{t} %{u*} %{x} %{z} %{Z}\
+\t%{!A:%{!nostdlib:%{!nostartfiles:%S}}}\
+\t%{static:} %{L*} %D %o\
+\t%{!nostdlib:%{!nodefaultlibs:%G %L %G}}\
+\t%{!A:%{!nostdlib:%{!nostartfiles:%E}}}\
+\t-Tdjgpp.djl %{T*}}}}}}}\n\
+%{!c:%{!M:%{!MM:%{!E:%{!S:stubify %{v} %{o*:%*} %{!o*:a.out} }}}}}"
+
+/* Always just link in 'libc.a'. */
+#undef LIB_SPEC
+#define LIB_SPEC "-lc"
+
+/* Pick the right startup code depending on the -pg flag. */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{pg:gcrt0.o%s}%{!pg:crt0.o%s}"
+
+/* Make sure that gcc will not look for .h files in /usr/local/include
+ unless user explicitly requests it. */
+#undef LOCAL_INCLUDE_DIR
+
+/* Switch into a generic section. */
+#define TARGET_ASM_NAMED_SECTION default_coff_asm_named_section
+
+/* This is how to output an assembler line
+ that says to advance the location counter
+ to a multiple of 2**LOG bytes. */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ if ((LOG) != 0) fprintf ((FILE), "\t.p2align %d\n", LOG)
+
+/* This is how to output a global symbol in the BSS section. */
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+ asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
+
+/* This is how to tell assembler that a symbol is weak */
+#undef ASM_WEAKEN_LABEL
+#define ASM_WEAKEN_LABEL(FILE,NAME) \
+ do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \
+ fputc ('\n', FILE); } while (0)
+
+/* djgpp automatically calls its own version of __main, so don't define one
+ in libgcc, nor call one in main(). */
+#define HAS_INIT_SECTION
+
+/* Definitions for types and sizes. Wide characters are 16-bits long so
+ Win32 compiler add-ons will be wide character compatible. */
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "short unsigned int"
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+/* Used to be defined in xm-djgpp.h, but moved here for cross-compilers. */
+#define LIBSTDCXX "-lstdcxx"
+
+#define TARGET_VERSION fprintf (stderr, " (80386, MS-DOS DJGPP)");
+
+/* Warn that -mbnu210 is now obsolete. */
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS \
+do \
+ { \
+ if (TARGET_BNU210) \
+ { \
+ warning (0, "-mbnu210 is ignored (option is obsolete)"); \
+ } \
+ } \
+while (0)
+
+/* Support for C++ templates. */
+#undef MAKE_DECL_ONE_ONLY
+#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)
diff --git a/gcc-4.4.0/gcc/config/i386/djgpp.opt b/gcc-4.4.0/gcc/config/i386/djgpp.opt
new file mode 100644
index 000000000..013bdf0bf
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/djgpp.opt
@@ -0,0 +1,25 @@
+; DJGPP-specific options.
+
+; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3. If not see
+; <http://www.gnu.org/licenses/>.
+
+;; -mbnu210 is now ignored and obsolete. It was used to enable support for
+;; weak symbols, and .gnu.linkonce support.
+mbnu210
+Target Var(TARGET_BNU210)
+Ignored (obsolete)
diff --git a/gcc-4.4.0/gcc/config/i386/driver-i386.c b/gcc-4.4.0/gcc/config/i386/driver-i386.c
new file mode 100644
index 000000000..69c6c0c7f
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/driver-i386.c
@@ -0,0 +1,613 @@
+/* Subroutines for the gcc driver.
+ Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include <stdlib.h>
+
+const char *host_detect_local_cpu (int argc, const char **argv);
+
+#ifdef __GNUC__
+#include "cpuid.h"
+
+struct cache_desc
+{
+ unsigned sizekb;
+ unsigned assoc;
+ unsigned line;
+};
+
+/* Returns command line parameters that describe size and
+ cache line size of the processor caches. */
+
+static char *
+describe_cache (struct cache_desc level1, struct cache_desc level2)
+{
+ char size[100], line[100], size2[100];
+
+ /* At the moment, gcc does not use the information
+ about the associativity of the cache. */
+
+ sprintf (size, "--param l1-cache-size=%u", level1.sizekb);
+ sprintf (line, "--param l1-cache-line-size=%u", level1.line);
+
+ sprintf (size2, "--param l2-cache-size=%u", level2.sizekb);
+
+ return concat (size, " ", line, " ", size2, " ", NULL);
+}
+
+/* Detect L2 cache parameters using CPUID extended function 0x80000006. */
+
+static void
+detect_l2_cache (struct cache_desc *level2)
+{
+ unsigned eax, ebx, ecx, edx;
+ unsigned assoc;
+
+ __cpuid (0x80000006, eax, ebx, ecx, edx);
+
+ level2->sizekb = (ecx >> 16) & 0xffff;
+ level2->line = ecx & 0xff;
+
+ assoc = (ecx >> 12) & 0xf;
+ if (assoc == 6)
+ assoc = 8;
+ else if (assoc == 8)
+ assoc = 16;
+ else if (assoc >= 0xa && assoc <= 0xc)
+ assoc = 32 + (assoc - 0xa) * 16;
+ else if (assoc >= 0xd && assoc <= 0xe)
+ assoc = 96 + (assoc - 0xd) * 32;
+
+ level2->assoc = assoc;
+}
+
+/* Returns the description of caches for an AMD processor. */
+
+static const char *
+detect_caches_amd (unsigned max_ext_level)
+{
+ unsigned eax, ebx, ecx, edx;
+
+ struct cache_desc level1, level2 = {0, 0, 0};
+
+ if (max_ext_level < 0x80000005)
+ return "";
+
+ __cpuid (0x80000005, eax, ebx, ecx, edx);
+
+ level1.sizekb = (ecx >> 24) & 0xff;
+ level1.assoc = (ecx >> 16) & 0xff;
+ level1.line = ecx & 0xff;
+
+ if (max_ext_level >= 0x80000006)
+ detect_l2_cache (&level2);
+
+ return describe_cache (level1, level2);
+}
+
+/* Decodes the size, the associativity and the cache line size of
+ L1/L2 caches of an Intel processor. Values are based on
+ "Intel Processor Identification and the CPUID Instruction"
+ [Application Note 485], revision -032, December 2007. */
+
+static void
+decode_caches_intel (unsigned reg, bool xeon_mp,
+ struct cache_desc *level1, struct cache_desc *level2)
+{
+ int i;
+
+ for (i = 24; i >= 0; i -= 8)
+ switch ((reg >> i) & 0xff)
+ {
+ case 0x0a:
+ level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
+ break;
+ case 0x0c:
+ level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
+ break;
+ case 0x2c:
+ level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
+ break;
+ case 0x39:
+ level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
+ break;
+ case 0x3a:
+ level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
+ break;
+ case 0x3b:
+ level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
+ break;
+ case 0x3c:
+ level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
+ break;
+ case 0x3d:
+ level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
+ break;
+ case 0x3e:
+ level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
+ break;
+ case 0x41:
+ level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
+ break;
+ case 0x42:
+ level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
+ break;
+ case 0x43:
+ level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
+ break;
+ case 0x44:
+ level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
+ break;
+ case 0x45:
+ level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
+ break;
+ case 0x49:
+ if (xeon_mp)
+ break;
+ level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
+ break;
+ case 0x4e:
+ level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
+ break;
+ case 0x60:
+ level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
+ break;
+ case 0x66:
+ level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
+ break;
+ case 0x67:
+ level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
+ break;
+ case 0x68:
+ level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
+ break;
+ case 0x78:
+ level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
+ break;
+ case 0x79:
+ level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
+ break;
+ case 0x7a:
+ level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
+ break;
+ case 0x7b:
+ level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
+ break;
+ case 0x7c:
+ level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
+ break;
+ case 0x7d:
+ level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
+ break;
+ case 0x7f:
+ level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
+ break;
+ case 0x82:
+ level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
+ break;
+ case 0x83:
+ level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
+ break;
+ case 0x84:
+ level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
+ break;
+ case 0x85:
+ level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
+ break;
+ case 0x86:
+ level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
+ break;
+ case 0x87:
+ level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
+
+ default:
+ break;
+ }
+}
+
+/* Detect cache parameters using CPUID function 2. */
+
+static void
+detect_caches_cpuid2 (bool xeon_mp,
+ struct cache_desc *level1, struct cache_desc *level2)
+{
+ unsigned regs[4];
+ int nreps, i;
+
+ __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
+
+ nreps = regs[0] & 0x0f;
+ regs[0] &= ~0x0f;
+
+ while (--nreps >= 0)
+ {
+ for (i = 0; i < 4; i++)
+ if (regs[i] && !((regs[i] >> 31) & 1))
+ decode_caches_intel (regs[i], xeon_mp, level1, level2);
+
+ if (nreps)
+ __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
+ }
+}
+
+/* Detect cache parameters using CPUID function 4. This
+ method doesn't require hardcoded tables. */
+
+enum cache_type
+{
+ CACHE_END = 0,
+ CACHE_DATA = 1,
+ CACHE_INST = 2,
+ CACHE_UNIFIED = 3
+};
+
+static void
+detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2)
+{
+ struct cache_desc *cache;
+
+ unsigned eax, ebx, ecx, edx;
+ int count;
+
+ for (count = 0;; count++)
+ {
+ __cpuid_count(4, count, eax, ebx, ecx, edx);
+ switch (eax & 0x1f)
+ {
+ case CACHE_END:
+ return;
+ case CACHE_DATA:
+ case CACHE_UNIFIED:
+ {
+ switch ((eax >> 5) & 0x07)
+ {
+ case 1:
+ cache = level1;
+ break;
+ case 2:
+ cache = level2;
+ break;
+ default:
+ cache = NULL;
+ }
+
+ if (cache)
+ {
+ unsigned sets = ecx + 1;
+ unsigned part = ((ebx >> 12) & 0x03ff) + 1;
+
+ cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
+ cache->line = (ebx & 0x0fff) + 1;
+
+ cache->sizekb = (cache->assoc * part
+ * cache->line * sets) / 1024;
+ }
+ }
+ default:
+ break;
+ }
+ }
+}
+
+/* Returns the description of caches for an Intel processor. */
+
+static const char *
+detect_caches_intel (bool xeon_mp, unsigned max_level, unsigned max_ext_level)
+{
+ struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0};
+
+ if (max_level >= 4)
+ detect_caches_cpuid4 (&level1, &level2);
+ else if (max_level >= 2)
+ detect_caches_cpuid2 (xeon_mp, &level1, &level2);
+ else
+ return "";
+
+ if (level1.sizekb == 0)
+ return "";
+
+ /* Intel CPUs are equipped with AMD style L2 cache info. Try this
+ method if other methods fail to provide L2 cache parameters. */
+ if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
+ detect_l2_cache (&level2);
+
+ return describe_cache (level1, level2);
+}
+
+enum vendor_signatures
+{
+ SIG_INTEL = 0x756e6547 /* Genu */,
+ SIG_AMD = 0x68747541 /* Auth */,
+ SIG_GEODE = 0x646f6547 /* Geod */
+};
+
+/* This will be called by the spec parser in gcc.c when it sees
+ a %:local_cpu_detect(args) construct. Currently it will be called
+ with either "arch" or "tune" as argument depending on if -march=native
+ or -mtune=native is to be substituted.
+
+ It returns a string containing new command line parameters to be
+ put at the place of the above two options, depending on what CPU
+ this is executed. E.g. "-march=k8" on an AMD64 machine
+ for -march=native.
+
+ ARGC and ARGV are set depending on the actual arguments given
+ in the spec. */
+
+const char *host_detect_local_cpu (int argc, const char **argv)
+{
+ enum processor_type processor = PROCESSOR_I386;
+ const char *cpu = "i386";
+
+ const char *cache = "";
+ const char *options = "";
+
+ unsigned int eax, ebx, ecx, edx;
+
+ unsigned int max_level, ext_level;
+
+ unsigned int vendor;
+ unsigned int model, family;
+
+ unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
+ unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
+
+ /* Extended features */
+ unsigned int has_lahf_lm = 0, has_sse4a = 0;
+ unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
+
+ bool arch;
+
+ if (argc < 1)
+ return NULL;
+
+ arch = !strcmp (argv[0], "arch");
+
+ if (!arch && strcmp (argv[0], "tune"))
+ return NULL;
+
+ max_level = __get_cpuid_max (0, &vendor);
+ if (max_level < 1)
+ goto done;
+
+ __cpuid (1, eax, ebx, ecx, edx);
+
+ /* We don't care for extended family. */
+ model = (eax >> 4) & 0x0f;
+ family = (eax >> 8) & 0x0f;
+
+ has_sse3 = ecx & bit_SSE3;
+ has_ssse3 = ecx & bit_SSSE3;
+ has_cmpxchg16b = ecx & bit_CMPXCHG16B;
+
+ has_cmpxchg8b = edx & bit_CMPXCHG8B;
+ has_cmov = edx & bit_CMOV;
+ has_mmx = edx & bit_MMX;
+ has_sse = edx & bit_SSE;
+ has_sse2 = edx & bit_SSE2;
+
+ /* Check cpuid level of extended features. */
+ __cpuid (0x80000000, ext_level, ebx, ecx, edx);
+
+ if (ext_level > 0x80000000)
+ {
+ __cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ has_lahf_lm = ecx & bit_LAHF_LM;
+ has_sse4a = ecx & bit_SSE4a;
+
+ has_longmode = edx & bit_LM;
+ has_3dnowp = edx & bit_3DNOWP;
+ has_3dnow = edx & bit_3DNOW;
+ }
+
+ if (!arch)
+ {
+ if (vendor == SIG_AMD)
+ cache = detect_caches_amd (ext_level);
+ else if (vendor == SIG_INTEL)
+ {
+ bool xeon_mp = (family == 15 && model == 6);
+ cache = detect_caches_intel (xeon_mp, max_level, ext_level);
+ }
+ }
+
+ if (vendor == SIG_AMD)
+ {
+ processor = PROCESSOR_PENTIUM;
+
+ if (has_mmx)
+ processor = PROCESSOR_K6;
+ if (has_3dnowp)
+ processor = PROCESSOR_ATHLON;
+ if (has_sse2 || has_longmode)
+ processor = PROCESSOR_K8;
+ if (has_sse4a)
+ processor = PROCESSOR_AMDFAM10;
+ }
+ else if (vendor == SIG_GEODE)
+ processor = PROCESSOR_GEODE;
+ else
+ {
+ switch (family)
+ {
+ case 4:
+ processor = PROCESSOR_I486;
+ break;
+ case 5:
+ processor = PROCESSOR_PENTIUM;
+ break;
+ case 6:
+ processor = PROCESSOR_PENTIUMPRO;
+ break;
+ case 15:
+ processor = PROCESSOR_PENTIUM4;
+ break;
+ default:
+ /* We have no idea. */
+ processor = PROCESSOR_GENERIC32;
+ }
+ }
+
+ switch (processor)
+ {
+ case PROCESSOR_I386:
+ /* Default. */
+ break;
+ case PROCESSOR_I486:
+ cpu = "i486";
+ break;
+ case PROCESSOR_PENTIUM:
+ if (arch && has_mmx)
+ cpu = "pentium-mmx";
+ else
+ cpu = "pentium";
+ break;
+ case PROCESSOR_PENTIUMPRO:
+ if (has_longmode)
+ /* It is Core 2 Duo. */
+ cpu = "core2";
+ else if (arch)
+ {
+ if (has_sse3)
+ /* It is Core Duo. */
+ cpu = "prescott";
+ else if (has_sse2)
+ /* It is Pentium M. */
+ cpu = "pentium-m";
+ else if (has_sse)
+ /* It is Pentium III. */
+ cpu = "pentium3";
+ else if (has_mmx)
+ /* It is Pentium II. */
+ cpu = "pentium2";
+ else
+ /* Default to Pentium Pro. */
+ cpu = "pentiumpro";
+ }
+ else
+ /* For -mtune, we default to -mtune=generic. */
+ cpu = "generic";
+ break;
+ case PROCESSOR_PENTIUM4:
+ if (has_sse3)
+ {
+ if (has_longmode)
+ cpu = "nocona";
+ else
+ cpu = "prescott";
+ }
+ else
+ cpu = "pentium4";
+ break;
+ case PROCESSOR_GEODE:
+ cpu = "geode";
+ break;
+ case PROCESSOR_K6:
+ if (arch && has_3dnow)
+ cpu = "k6-3";
+ else
+ cpu = "k6";
+ break;
+ case PROCESSOR_ATHLON:
+ if (arch && has_sse)
+ cpu = "athlon-4";
+ else
+ cpu = "athlon";
+ break;
+ case PROCESSOR_K8:
+ if (arch && has_sse3)
+ cpu = "k8-sse3";
+ else
+ cpu = "k8";
+ break;
+ case PROCESSOR_AMDFAM10:
+ cpu = "amdfam10";
+ break;
+
+ default:
+ /* Use something reasonable. */
+ if (arch)
+ {
+ if (has_ssse3)
+ cpu = "core2";
+ else if (has_sse3)
+ {
+ if (has_longmode)
+ cpu = "nocona";
+ else
+ cpu = "prescott";
+ }
+ else if (has_sse2)
+ cpu = "pentium4";
+ else if (has_cmov)
+ cpu = "pentiumpro";
+ else if (has_mmx)
+ cpu = "pentium-mmx";
+ else if (has_cmpxchg8b)
+ cpu = "pentium";
+ }
+ else
+ cpu = "generic";
+ }
+
+ if (arch)
+ {
+ if (has_cmpxchg16b)
+ options = concat (options, "-mcx16 ", NULL);
+ if (has_lahf_lm)
+ options = concat (options, "-msahf ", NULL);
+ }
+
+done:
+ return concat (cache, "-m", argv[0], "=", cpu, " ", options, NULL);
+}
+#else
+
+/* If we aren't compiling with GCC we just provide a minimal
+ default value. */
+
+const char *host_detect_local_cpu (int argc, const char **argv)
+{
+ const char *cpu;
+ bool arch;
+
+ if (argc < 1)
+ return NULL;
+
+ arch = !strcmp (argv[0], "arch");
+
+ if (!arch && strcmp (argv[0], "tune"))
+ return NULL;
+
+ if (arch)
+ {
+ /* FIXME: i386 is wrong for 64bit compiler. How can we tell if
+ we are generating 64bit or 32bit code? */
+ cpu = "i386";
+ }
+ else
+ cpu = "generic";
+
+ return concat ("-m", argv[0], "=", cpu, NULL);
+}
+#endif /* __GNUC__ */
diff --git a/gcc-4.4.0/gcc/config/i386/emmintrin.h b/gcc-4.4.0/gcc/config/i386/emmintrin.h
new file mode 100644
index 000000000..9467fe0f9
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/emmintrin.h
@@ -0,0 +1,1513 @@
+/* Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0. */
+
+#ifndef _EMMINTRIN_H_INCLUDED
+#define _EMMINTRIN_H_INCLUDED
+
+#ifndef __SSE2__
+# error "SSE2 instruction set not enabled"
+#else
+
+/* We need definitions from the SSE header files*/
+#include <xmmintrin.h>
+
+/* SSE2 */
+typedef double __v2df __attribute__ ((__vector_size__ (16)));
+typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef short __v8hi __attribute__ ((__vector_size__ (16)));
+typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* Create a selector for use with the SHUFPD instruction. */
+#define _MM_SHUFFLE2(fp1,fp0) \
+ (((fp1) << 1) | (fp0))
+
+/* Create a vector with element 0 as F and the rest zero. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_sd (double __F)
+{
+ return __extension__ (__m128d){ __F, 0.0 };
+}
+
+/* Create a vector with both elements equal to F. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pd (double __F)
+{
+ return __extension__ (__m128d){ __F, __F };
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pd1 (double __F)
+{
+ return _mm_set1_pd (__F);
+}
+
+/* Create a vector with the lower value X and upper value W. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pd (double __W, double __X)
+{
+ return __extension__ (__m128d){ __X, __W };
+}
+
+/* Create a vector with the lower value W and upper value X. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pd (double __W, double __X)
+{
+ return __extension__ (__m128d){ __W, __X };
+}
+
+/* Create a vector of zeros. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_pd (void)
+{
+ return __extension__ (__m128d){ 0.0, 0.0 };
+}
+
+/* Sets the low DPFP value of A from the low value of B. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
+}
+
+/* Load two DPFP values from P. The address must be 16-byte aligned. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_pd (double const *__P)
+{
+ return *(__m128d *)__P;
+}
+
+/* Load two DPFP values from P. The address need not be 16-byte aligned. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_pd (double const *__P)
+{
+ return __builtin_ia32_loadupd (__P);
+}
+
+/* Create a vector with all two elements equal to *P. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load1_pd (double const *__P)
+{
+ return _mm_set1_pd (*__P);
+}
+
+/* Create a vector with element 0 as *P and the rest zero. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_sd (double const *__P)
+{
+ return _mm_set_sd (*__P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_pd1 (double const *__P)
+{
+ return _mm_load1_pd (__P);
+}
+
+/* Load two DPFP values in reverse order. The address must be aligned. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadr_pd (double const *__P)
+{
+ __m128d __tmp = _mm_load_pd (__P);
+ return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
+}
+
+/* Store two DPFP values. The address must be 16-byte aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_pd (double *__P, __m128d __A)
+{
+ *(__m128d *)__P = __A;
+}
+
+/* Store two DPFP values. The address need not be 16-byte aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_pd (double *__P, __m128d __A)
+{
+ __builtin_ia32_storeupd (__P, __A);
+}
+
+/* Stores the lower DPFP value. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_sd (double *__P, __m128d __A)
+{
+ *__P = __builtin_ia32_vec_ext_v2df (__A, 0);
+}
+
+extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_f64 (__m128d __A)
+{
+ return __builtin_ia32_vec_ext_v2df (__A, 0);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_pd (double *__P, __m128d __A)
+{
+ _mm_store_sd (__P, __A);
+}
+
+/* Stores the upper DPFP value. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeh_pd (double *__P, __m128d __A)
+{
+ *__P = __builtin_ia32_vec_ext_v2df (__A, 1);
+}
+
+/* Store the lower DPFP value across two words.
+ The address must be 16-byte aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store1_pd (double *__P, __m128d __A)
+{
+ _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_pd1 (double *__P, __m128d __A)
+{
+ _mm_store1_pd (__P, __A);
+}
+
+/* Store two DPFP values in reverse order. The address must be aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storer_pd (double *__P, __m128d __A)
+{
+ _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si32 (__m128i __A)
+{
+ return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si64 (__m128i __A)
+{
+ return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
+}
+
+/* Microsoft intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi128_si64x (__m128i __A)
+{
+ return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
+}
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_pd (__m128d __A)
+{
+ return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
+}
+
+/* Return pair {sqrt (A[0), B[1]}. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_sd (__m128d __A, __m128d __B)
+{
+ __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
+ return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+ (__v2df)
+ __builtin_ia32_cmpltsd ((__v2df) __B,
+ (__v2df)
+ __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+ (__v2df)
+ __builtin_ia32_cmplesd ((__v2df) __B,
+ (__v2df)
+ __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+ (__v2df)
+ __builtin_ia32_cmpnltsd ((__v2df) __B,
+ (__v2df)
+ __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
+ (__v2df)
+ __builtin_ia32_cmpnlesd ((__v2df) __B,
+ (__v2df)
+ __A));
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_sd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comieq_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comilt_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comile_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comigt_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comige_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comineq_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomieq_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomilt_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomile_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomigt_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomige_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomineq_sd (__m128d __A, __m128d __B)
+{
+ return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
+}
+
+/* Create a vector of Qi, where i is the element number. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi64x (long long __q1, long long __q0)
+{
+ return __extension__ (__m128i)(__v2di){ __q0, __q1 };
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi64 (__m64 __q1, __m64 __q0)
+{
+ return _mm_set_epi64x ((long long)__q1, (long long)__q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
+{
+ return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
+ short __q3, short __q2, short __q1, short __q0)
+{
+ return __extension__ (__m128i)(__v8hi){
+ __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
+ char __q11, char __q10, char __q09, char __q08,
+ char __q07, char __q06, char __q05, char __q04,
+ char __q03, char __q02, char __q01, char __q00)
+{
+ return __extension__ (__m128i)(__v16qi){
+ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
+ __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
+ };
+}
+
+/* Set all of the elements of the vector to A. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi64x (long long __A)
+{
+ return _mm_set_epi64x (__A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi64 (__m64 __A)
+{
+ return _mm_set_epi64 (__A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi32 (int __A)
+{
+ return _mm_set_epi32 (__A, __A, __A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi16 (short __A)
+{
+ return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_epi8 (char __A)
+{
+ return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
+ __A, __A, __A, __A, __A, __A, __A, __A);
+}
+
+/* Create a vector of Qi, where i is the element number.
+ The parameter order is reversed from the _mm_set_epi* functions. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi64 (__m64 __q0, __m64 __q1)
+{
+ return _mm_set_epi64 (__q1, __q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
+{
+ return _mm_set_epi32 (__q3, __q2, __q1, __q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
+ short __q4, short __q5, short __q6, short __q7)
+{
+ return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
+ char __q04, char __q05, char __q06, char __q07,
+ char __q08, char __q09, char __q10, char __q11,
+ char __q12, char __q13, char __q14, char __q15)
+{
+ return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
+ __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
+}
+
+/* Create a vector with element 0 as *P and the rest zero. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_si128 (__m128i const *__P)
+{
+ return *__P;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_si128 (__m128i const *__P)
+{
+ return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_epi64 (__m128i const *__P)
+{
+ return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_si128 (__m128i *__P, __m128i __B)
+{
+ *__P = __B;
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_si128 (__m128i *__P, __m128i __B)
+{
+ __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_epi64 (__m128i *__P, __m128i __B)
+{
+ *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movepi64_pi64 (__m128i __B)
+{
+ return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movpi64_epi64 (__m64 __A)
+{
+ return _mm_set_epi64 ((__m64)0LL, __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_epi64 (__m128i __A)
+{
+ return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
+}
+
+/* Create a vector of zeros. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_si128 (void)
+{
+ return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_pd (__m128i __A)
+{
+ return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_ps (__m128i __A)
+{
+ return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_epi32 (__m128d __A)
+{
+ return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_pi32 (__m128d __A)
+{
+ return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpd_ps (__m128d __A)
+{
+ return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_epi32 (__m128d __A)
+{
+ return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttpd_pi32 (__m128d __A)
+{
+ return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32_pd (__m64 __A)
+{
+ return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_epi32 (__m128 __A)
+{
+ return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_epi32 (__m128 __A)
+{
+ return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pd (__m128 __A)
+{
+ return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_si32 (__m128d __A)
+{
+ return __builtin_ia32_cvtsd2si ((__v2df) __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_si64 (__m128d __A)
+{
+ return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+
+/* Microsoft intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_si64x (__m128d __A)
+{
+ return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+#endif
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_si32 (__m128d __A)
+{
+ return __builtin_ia32_cvttsd2si ((__v2df) __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_si64 (__m128d __A)
+{
+ return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+
+/* Microsoft intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsd_si64x (__m128d __A)
+{
+ return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+#endif
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_ss (__m128 __A, __m128d __B)
+{
+ return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_sd (__m128d __A, int __B)
+{
+ return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_sd (__m128d __A, long long __B)
+{
+ return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+
+/* Microsoft intrinsic. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_sd (__m128d __A, long long __B)
+{
+ return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_sd (__m128d __A, __m128 __B)
+{
+ return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
+{
+ return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
+}
+#else
+#define _mm_shuffle_pd(A, B, N) \
+ ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), (int)(N)))
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pd (__m128d __A, __m128d __B)
+{
+ return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadh_pd (__m128d __A, double const *__B)
+{
+ return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_pd (__m128d __A, double const *__B)
+{
+ return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_pd (__m128d __A)
+{
+ return __builtin_ia32_movmskpd ((__v2df)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packus_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_epu16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_epu16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_su32 (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_epu32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_epi16 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_epi32 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_epi64 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_epi16 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_epi32 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_si128 (__m128i __A, const int __N)
+{
+ return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_si128 (__m128i __A, const int __N)
+{
+ return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
+}
+#else
+#define _mm_srli_si128(A, N) \
+ ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
+#define _mm_slli_si128(A, N) \
+ ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_epi16 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_epi32 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_epi64 (__m128i __A, int __B)
+{
+ return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_epi64 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_si128 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi16 (__m128i const __A, int const __N)
+{
+ return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
+{
+ return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
+}
+#else
+#define _mm_extract_epi16(A, N) \
+ ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_insert_epi16(A, D, N) \
+ ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
+ (int)(D), (int)(N)))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_epi8 (__m128i __A)
+{
+ return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_epu16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shufflehi_epi16 (__m128i __A, const int __mask)
+{
+ return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shufflelo_epi16 (__m128i __A, const int __mask)
+{
+ return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi32 (__m128i __A, const int __mask)
+{
+ return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
+}
+#else
+#define _mm_shufflehi_epi16(A, N) \
+ ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_shufflelo_epi16(A, N) \
+ ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
+#define _mm_shuffle_epi32(A, N) \
+ ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
+#endif
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
+{
+ __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_epu16 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sad_epu8 (__m128i __A, __m128i __B)
+{
+ return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_si32 (int *__A, int __B)
+{
+ __builtin_ia32_movnti (__A, __B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_si128 (__m128i *__A, __m128i __B)
+{
+ __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_pd (double *__A, __m128d __B)
+{
+ __builtin_ia32_movntpd (__A, (__v2df)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_clflush (void const *__A)
+{
+ __builtin_ia32_clflush (__A);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lfence (void)
+{
+ __builtin_ia32_lfence ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mfence (void)
+{
+ __builtin_ia32_mfence ();
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_si128 (int __A)
+{
+ return _mm_set_epi32 (0, 0, 0, __A);
+}
+
+#ifdef __x86_64__
+/* Intel intrinsic. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_si128 (long long __A)
+{
+ return _mm_set_epi64x (0, __A);
+}
+
+/* Microsoft intrinsic. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_si128 (long long __A)
+{
+ return _mm_set_epi64x (0, __A);
+}
+#endif
+
+/* Casts between various SP, DP, INT vector types. Note that these do no
+ conversion of values, they just change the type. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_ps(__m128d __A)
+{
+ return (__m128) __A;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castpd_si128(__m128d __A)
+{
+ return (__m128i) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_pd(__m128 __A)
+{
+ return (__m128d) __A;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castps_si128(__m128 __A)
+{
+ return (__m128i) __A;
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_ps(__m128i __A)
+{
+ return (__m128) __A;
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_castsi128_pd(__m128i __A)
+{
+ return (__m128d) __A;
+}
+
+#endif /* __SSE2__ */
+
+#endif /* _EMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/freebsd.h b/gcc-4.4.0/gcc/config/i386/freebsd.h
new file mode 100644
index 000000000..69f5e0f30
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/freebsd.h
@@ -0,0 +1,140 @@
+/* Definitions for Intel 386 running FreeBSD with ELF format
+ Copyright (C) 1996, 2000, 2002, 2004, 2007 Free Software Foundation, Inc.
+ Contributed by Eric Youngdale.
+ Modified for stabs-in-ELF by H.J. Lu.
+ Adapted from GNU/Linux version by John Polstra.
+ Continued development by David O'Brien <obrien@freebsd.org>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+
+#define TARGET_VERSION fprintf (stderr, " (i386 FreeBSD/ELF)");
+
+/* Override the default comment-starter of "/". */
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+ (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n])
+
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS 1
+
+/* Tell final.c that we don't need a label passed to mcount. */
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME ".mcount"
+
+/* Make gcc agree with <machine/ansi.h>. */
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE (TARGET_64BIT ? 32 : BITS_PER_WORD)
+
+#undef SUBTARGET_EXTRA_SPECS /* i386.h bogusly defines it. */
+#define SUBTARGET_EXTRA_SPECS \
+ { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
+
+/* Provide a STARTFILE_SPEC appropriate for FreeBSD. Here we add
+ the magical crtbegin.o file (see crtstuff.c) which provides part
+ of the support for getting C++ file-scope static object constructed
+ before entering `main'. */
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+ "%{!shared: \
+ %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} \
+ %{!p:%{profile:gcrt1.o%s} \
+ %{!profile:crt1.o%s}}}} \
+ crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+/* Provide a ENDFILE_SPEC appropriate for FreeBSD. Here we tack on
+ the magical crtend.o file (see crtstuff.c) which provides part of
+ the support for getting C++ file-scope static object constructed
+ before entering `main', followed by a normal "finalizer" file,
+ `crtn.o'. */
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s"
+
+/* Provide a LINK_SPEC appropriate for FreeBSD. Here we provide support
+ for the special GCC options -static and -shared, which allow us to
+ link things in one of these three modes by applying the appropriate
+ combinations of options at link-time. We like to support here for
+ as many of the other GNU linker options as possible. But I don't
+ have the time to search for those flags. I am sure how to add
+ support for -soname shared_object_name. H.J.
+
+ I took out %{v:%{!V:-V}}. It is too much :-(. They can use
+ -Wl,-V.
+
+ When the -shared link option is used a final link is not being
+ done. */
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+ %{p:%nconsider using `-pg' instead of `-p' with gprof(1)} \
+ %{v:-V} \
+ %{assert*} %{R*} %{rpath*} %{defsym*} \
+ %{shared:-Bshareable %{h*} %{soname*}} \
+ %{!shared: \
+ %{!static: \
+ %{rdynamic:-export-dynamic} \
+ %{!dynamic-linker:-dynamic-linker %(fbsd_dynamic_linker) }} \
+ %{static:-Bstatic}} \
+ %{symbolic:-Bsymbolic}"
+
+/* A C statement to output to the stdio stream FILE an assembler
+ command to advance the location counter to a multiple of 1<<LOG
+ bytes if it is within MAX_SKIP bytes.
+
+ This is used to align code labels according to Intel recommendations. */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#undef ASM_OUTPUT_MAX_SKIP_ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP) \
+ if ((LOG) != 0) { \
+ if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+ else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
+ }
+#endif
+
+/* Don't default to pcc-struct-return, we want to retain compatibility with
+ older gcc versions AND pcc-struct-return is nonreentrant.
+ (even though the SVR4 ABI for the i386 says that records and unions are
+ returned in memory). */
+
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* FreeBSD sets the rounding precision of the FPU to 53 bits. Let the
+ compiler get the contents of <float.h> and std::numeric_limits correct. */
+#undef TARGET_96_ROUND_53_LONG_DOUBLE
+#define TARGET_96_ROUND_53_LONG_DOUBLE (!TARGET_64BIT)
diff --git a/gcc-4.4.0/gcc/config/i386/freebsd64.h b/gcc-4.4.0/gcc/config/i386/freebsd64.h
new file mode 100644
index 000000000..69915ea61
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/freebsd64.h
@@ -0,0 +1,43 @@
+/* Definitions for AMD x86-64 running FreeBSD with ELF format
+ Copyright (C) 2002, 2004, 2007 Free Software Foundation, Inc.
+ Contributed by David O'Brien <obrien@FreeBSD.org>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (FreeBSD/x86-64 ELF)");
+
+#define SUBTARGET_EXTRA_SPECS \
+ { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
+
+/* Provide a LINK_SPEC appropriate for the FreeBSD/x86-64 ELF target.
+ This is a copy of LINK_SPEC from <i386/freebsd.h> tweaked for
+ the x86-64 target. */
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+ %{m32:-m elf_i386_fbsd} \
+ %{v:-V} \
+ %{assert*} %{R*} %{rpath*} %{defsym*} \
+ %{shared:-Bshareable %{h*} %{soname*}} \
+ %{!shared: \
+ %{!static: \
+ %{rdynamic:-export-dynamic} \
+ %{!dynamic-linker:-dynamic-linker %(fbsd_dynamic_linker) }} \
+ %{static:-Bstatic}} \
+ %{symbolic:-Bsymbolic}"
diff --git a/gcc-4.4.0/gcc/config/i386/gas.h b/gcc-4.4.0/gcc/config/i386/gas.h
new file mode 100644
index 000000000..f3d43b31e
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/gas.h
@@ -0,0 +1,127 @@
+/* Definitions for Intel 386 using GAS.
+ Copyright (C) 1988, 1993, 1994, 1996, 2002, 2004, 2007, 2008
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Note that i386/seq-gas.h is a GAS configuration that does not use this
+ file. */
+
+/* Use the bsd assembler syntax. */
+/* we need to do this because gas is really a bsd style assembler,
+ * and so doesn't work well this these att-isms:
+ *
+ * ASM_OUTPUT_SKIP is .set .,.+N, which isn't implemented in gas
+ * ASM_OUTPUT_LOCAL is done with .set .,.+N, but that can't be
+ * used to define bss static space
+ *
+ * Next is the question of whether to uses underscores. RMS didn't
+ * like this idea at first, but since it is now obvious that we
+ * need this separate tm file for use with gas, at least to get
+ * dbx debugging info, I think we should also switch to underscores.
+ * We can keep i386v for real att style output, and the few
+ * people who want both form will have to compile twice.
+ */
+
+/* these come from i386/bsd.h, but are specific to sequent */
+#undef DBX_NO_XREFS
+#undef DBX_CONTIN_LENGTH
+
+/* Ask for COFF symbols. */
+
+#define SDB_DEBUGGING_INFO 1
+
+/* Output #ident as a .ident. */
+
+#define ASM_OUTPUT_IDENT(FILE, NAME) fprintf (FILE, "\t.ident \"%s\"\n", NAME);
+
+/* In the past there was confusion as to what the argument to .align was
+ in GAS. For the last several years the rule has been this: for a.out
+ file formats that argument is LOG, and for all other file formats the
+ argument is 1<<LOG.
+
+ However, GAS now has .p2align and .balign pseudo-ops so to remove any
+ doubt or guess work, and since this file is used for both a.out and other
+ file formats, we use one of them. */
+
+#ifdef HAVE_GAS_BALIGN_AND_P2ALIGN
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ if ((LOG)!=0) fprintf ((FILE), "\t.balign %d\n", 1<<(LOG))
+#endif
+
+/* A C statement to output to the stdio stream FILE an assembler
+ command to advance the location counter to a multiple of 1<<LOG
+ bytes if it is within MAX_SKIP bytes.
+
+ This is used to align code labels according to Intel recommendations. */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+# define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
+ if ((LOG) != 0) {\
+ if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+ else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
+ }
+#endif
+
+/* A C statement or statements which output an assembler instruction
+ opcode to the stdio stream STREAM. The macro-operand PTR is a
+ variable of type `char *' which points to the opcode name in its
+ "internal" form--the form that is written in the machine description.
+
+ GAS version 1.38.1 doesn't understand the `repz' opcode mnemonic.
+ So use `repe' instead. */
+
+#undef ASM_OUTPUT_OPCODE
+#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
+{ \
+ if ((PTR)[0] == 'r' \
+ && (PTR)[1] == 'e' \
+ && (PTR)[2] == 'p') \
+ { \
+ if ((PTR)[3] == 'z') \
+ { \
+ fprintf (STREAM, "repe"); \
+ (PTR) += 4; \
+ } \
+ else if ((PTR)[3] == 'n' && (PTR)[4] == 'z') \
+ { \
+ fprintf (STREAM, "repne"); \
+ (PTR) += 5; \
+ } \
+ } \
+ else \
+ ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR)); \
+}
+
+/* Define macro used to output shift-double opcodes when the shift
+ count is in %cl. Some assemblers require %cl as an argument;
+ some don't.
+
+ GAS requires the %cl argument, so override i386/unix.h. */
+
+#undef SHIFT_DOUBLE_OMITS_COUNT
+#define SHIFT_DOUBLE_OMITS_COUNT 0
+
+/* Print opcodes the way that GAS expects them. */
+#define GAS_MNEMONICS 1
+
+/* The comment-starter string as GAS expects it. */
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
diff --git a/gcc-4.4.0/gcc/config/i386/geode.md b/gcc-4.4.0/gcc/config/i386/geode.md
new file mode 100644
index 000000000..c063e58b9
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/geode.md
@@ -0,0 +1,152 @@
+;; Geode Scheduling
+;; Copyright (C) 2006, 2007
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; The Geode architecture is one insn issue processor.
+;;
+;; This description is based on data from the following documents:
+;;
+;; "AMD Geode GX Processor Data Book"
+;; Advanced Micro Devices, Inc., Aug 2005.
+;;
+;; "AMD Geode LX Processor Data Book"
+;; Advanced Micro Devices, Inc., Jan 2006.
+;;
+;;
+;; CPU execution units of the Geode:
+;;
+;; issue describes the issue pipeline.
+;; alu describes the Integer unit
+;; fpu describes the FP unit
+;;
+;; The fp unit is out of order execution unit with register renaming.
+;; There is also memory management unit and execution pipeline for
+;; load/store operations. We ignore it and difference between insns
+;; using memory and registers.
+
+(define_automaton "geode")
+
+(define_cpu_unit "geode_issue,geode_alu,geode_fpu" "geode")
+
+(define_insn_reservation "alu" 1
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "alu,alu1,negnot,icmp,lea,test,imov,imovx,icmov,incdec,setcc"))
+ "geode_issue,geode_alu")
+
+(define_insn_reservation "shift" 2
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "ishift,ishift1,rotate,rotate1"))
+ "geode_issue,geode_alu*2")
+
+(define_insn_reservation "imul" 7
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "imul"))
+ "geode_issue,geode_alu*7")
+
+(define_insn_reservation "idiv" 40
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "idiv"))
+ "geode_issue,geode_alu*40")
+
+;; The branch unit.
+(define_insn_reservation "call" 2
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "call,callv"))
+ "geode_issue,geode_alu*2")
+
+(define_insn_reservation "geode_branch" 1
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "ibr"))
+ "geode_issue,geode_alu")
+
+(define_insn_reservation "geode_pop_push" 1
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "pop,push"))
+ "geode_issue,geode_alu")
+
+(define_insn_reservation "geode_leave" 2
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "leave"))
+ "geode_issue,geode_alu*2")
+
+(define_insn_reservation "geode_load_str" 4
+ (and (eq_attr "cpu" "geode")
+ (and (eq_attr "type" "str")
+ (eq_attr "memory" "load,both")))
+ "geode_issue,geode_alu*4")
+
+(define_insn_reservation "geode_store_str" 2
+ (and (eq_attr "cpu" "geode")
+ (and (eq_attr "type" "str")
+ (eq_attr "memory" "store")))
+ "geode_issue,geode_alu*2")
+
+;; Be optimistic
+(define_insn_reservation "geode_unknown" 1
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "multi,other"))
+ "geode_issue,geode_alu")
+
+;; FPU
+
+(define_insn_reservation "geode_fop" 6
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "fop,fcmp"))
+ "geode_issue,geode_fpu*6")
+
+(define_insn_reservation "geode_fsimple" 1
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "fmov,fcmov,fsgn,fxch"))
+ "geode_issue,geode_fpu")
+
+(define_insn_reservation "geode_fist" 4
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "fistp,fisttp"))
+ "geode_issue,geode_fpu*4")
+
+(define_insn_reservation "geode_fmul" 10
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "fmul"))
+ "geode_issue,geode_fpu*10")
+
+(define_insn_reservation "geode_fdiv" 47
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "fdiv"))
+ "geode_issue,geode_fpu*47")
+
+;; We use minimal latency (fsin) here
+(define_insn_reservation "geode_fpspc" 54
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "fpspc"))
+ "geode_issue,geode_fpu*54")
+
+(define_insn_reservation "geode_frndint" 12
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "frndint"))
+ "geode_issue,geode_fpu*12")
+
+(define_insn_reservation "geode_mmxmov" 1
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "mmxmov"))
+ "geode_issue,geode_fpu")
+
+(define_insn_reservation "geode_mmx" 2
+ (and (eq_attr "cpu" "geode")
+ (eq_attr "type" "mmx,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"))
+ "geode_issue,geode_fpu*2")
diff --git a/gcc-4.4.0/gcc/config/i386/gmm_malloc.h b/gcc-4.4.0/gcc/config/i386/gmm_malloc.h
new file mode 100644
index 000000000..7a7e84069
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/gmm_malloc.h
@@ -0,0 +1,74 @@
+/* Copyright (C) 2004, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _MM_MALLOC_H_INCLUDED
+#define _MM_MALLOC_H_INCLUDED
+
+#include <stdlib.h>
+#include <errno.h>
+
+static __inline__ void*
+_mm_malloc (size_t size, size_t align)
+{
+ void * malloc_ptr;
+ void * aligned_ptr;
+
+ /* Error if align is not a power of two. */
+ if (align & (align - 1))
+ {
+ errno = EINVAL;
+ return ((void*) 0);
+ }
+
+ if (size == 0)
+ return ((void *) 0);
+
+ /* Assume malloc'd pointer is aligned at least to sizeof (void*).
+ If necessary, add another sizeof (void*) to store the value
+ returned by malloc. Effectively this enforces a minimum alignment
+ of sizeof double. */
+ if (align < 2 * sizeof (void *))
+ align = 2 * sizeof (void *);
+
+ malloc_ptr = malloc (size + align);
+ if (!malloc_ptr)
+ return ((void *) 0);
+
+ /* Align We have at least sizeof (void *) space below malloc'd ptr. */
+ aligned_ptr = (void *) (((size_t) malloc_ptr + align)
+ & ~((size_t) (align) - 1));
+
+ /* Store the original pointer just before p. */
+ ((void **) aligned_ptr) [-1] = malloc_ptr;
+
+ return aligned_ptr;
+}
+
+static __inline__ void
+_mm_free (void * aligned_ptr)
+{
+ if (aligned_ptr)
+ free (((void **) aligned_ptr) [-1]);
+}
+
+#endif /* _MM_MALLOC_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/gmon-sol2.c b/gcc-4.4.0/gcc/config/i386/gmon-sol2.c
new file mode 100644
index 000000000..d20762156
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/gmon-sol2.c
@@ -0,0 +1,459 @@
+/*-
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. [rescinded 22 July 1999]
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This is a modified gmon.c by J.W.Hawtin <oolon@ankh.org>,
+ * 14/8/96 based on the original gmon.c in GCC and the hacked version
+ * solaris 2 sparc version (config/sparc/gmon-sol.c) by Mark Eichin. To do
+ * process profiling on solaris 2.X X86
+ *
+ * It must be used in conjunction with sol2-gc1.asm, which is used to start
+ * and stop process monitoring.
+ *
+ * Differences.
+ *
+ * On Solaris 2 _mcount is called by library functions not mcount, so support
+ * has been added for both.
+ *
+ * Also the prototype for profil() is different
+ *
+ * Solaris 2 does not seem to have char *minbrk whcih allows the setting of
+ * the minimum SBRK region so this code has been removed and lets pray malloc
+ * does not mess it up.
+ *
+ * Notes
+ *
+ * This code could easily be integrated with the original gmon.c and perhaps
+ * should be.
+ */
+#include "tconfig.h"
+#include "tsystem.h"
+#include <fcntl.h> /* for creat() */
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+static void moncontrol (int);
+extern void monstartup (char *, char *);
+extern void _mcleanup (void);
+extern void internal_mcount (
+#ifdef __x86_64__
+ char *, unsigned short *
+#else
+ void
+#endif
+ );
+
+
+struct phdr {
+ char *lpc;
+ char *hpc;
+ int ncnt;
+};
+
+
+#define HISTFRACTION 2
+#define HISTCOUNTER unsigned short
+#define HASHFRACTION 1
+#define ARCDENSITY 2
+#define MINARCS 50
+#define BASEADDRESS 0x8000000 /* On Solaris 2 X86 all executables start here
+ and not at 0 */
+
+struct tostruct {
+ char *selfpc;
+ long count;
+ unsigned short link;
+};
+
+struct rawarc {
+ unsigned long raw_frompc;
+ unsigned long raw_selfpc;
+ long raw_count;
+};
+#define ROUNDDOWN(x,y) (((x)/(y))*(y))
+#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
+
+/* char *minbrk; */
+
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ intptr_t;
+
+ /*
+ * froms is actually a bunch of unsigned shorts indexing tos
+ */
+static int profiling = 3;
+static unsigned short *froms;
+static struct tostruct *tos = 0;
+static long tolimit = 0;
+static char *s_lowpc = 0;
+static char *s_highpc = 0;
+static size_t s_textsize = 0;
+
+static int ssiz;
+static char *sbuf;
+static int s_scale;
+ /* see profil(2) where this is describe (incorrectly) */
+#define SCALE_1_TO_1 0x10000L
+
+#define MSG "No space for profiling buffer(s)\n"
+
+extern int errno;
+
+extern void *sbrk (intptr_t);
+
+void
+monstartup(char *lowpc, char *highpc)
+{
+ size_t monsize;
+ char *buffer;
+ register size_t o;
+
+ /*
+ * round lowpc and highpc to multiples of the density we're using
+ * so the rest of the scaling (here and in gprof) stays in ints.
+ */
+ lowpc = (char *)
+ ROUNDDOWN((size_t)lowpc, HISTFRACTION*sizeof(HISTCOUNTER));
+ s_lowpc = lowpc;
+ highpc = (char *)
+ ROUNDUP((size_t)highpc, HISTFRACTION*sizeof(HISTCOUNTER));
+ s_highpc = highpc;
+ s_textsize = highpc - lowpc;
+ monsize = (s_textsize / HISTFRACTION) + sizeof(struct phdr);
+ buffer = (char *) sbrk( monsize );
+ if ( buffer == (char *) -1 ) {
+ write( 2 , MSG , sizeof(MSG) );
+ return;
+ }
+ froms = (unsigned short *) sbrk( s_textsize / HASHFRACTION );
+ if ( froms == (unsigned short *) -1 ) {
+ write( 2 , MSG , sizeof(MSG) );
+ froms = 0;
+ return;
+ }
+ tolimit = s_textsize * ARCDENSITY / 100;
+ if ( tolimit < MINARCS ) {
+ tolimit = MINARCS;
+ } else if ( tolimit > 65534 ) {
+ tolimit = 65534;
+ }
+ tos = (struct tostruct *) sbrk( tolimit * sizeof( struct tostruct ) );
+ if ( tos == (struct tostruct *) -1 ) {
+ write( 2 , MSG , sizeof(MSG) );
+ froms = 0;
+ tos = 0;
+ return;
+ }
+/* minbrk = (char *) sbrk(0);*/
+ tos[0].link = 0;
+ sbuf = buffer;
+ ssiz = monsize;
+ ( (struct phdr *) buffer ) -> lpc = lowpc;
+ ( (struct phdr *) buffer ) -> hpc = highpc;
+ ( (struct phdr *) buffer ) -> ncnt = ssiz;
+ monsize -= sizeof(struct phdr);
+ if ( monsize <= 0 )
+ return;
+ o = highpc - lowpc;
+ if( monsize < o )
+#ifndef hp300
+ s_scale = ( (float) monsize / o ) * SCALE_1_TO_1;
+#else /* avoid floating point */
+ {
+ int quot = o / monsize;
+
+ if (quot >= 0x10000)
+ s_scale = 1;
+ else if (quot >= 0x100)
+ s_scale = 0x10000 / quot;
+ else if (o >= 0x800000)
+ s_scale = 0x1000000 / (o / (monsize >> 8));
+ else
+ s_scale = 0x1000000 / ((o << 8) / monsize);
+ }
+#endif
+ else
+ s_scale = SCALE_1_TO_1;
+ moncontrol(1);
+}
+
+void
+_mcleanup (void)
+{
+ int fd;
+ int fromindex;
+ int endfrom;
+ char *frompc;
+ int toindex;
+ struct rawarc rawarc;
+
+ moncontrol(0);
+ fd = creat( "gmon.out" , 0666 );
+ if ( fd < 0 ) {
+ perror( "mcount: gmon.out" );
+ return;
+ }
+# ifdef DEBUG
+ fprintf( stderr , "[mcleanup] sbuf 0x%x ssiz %d\n" , sbuf , ssiz );
+# endif /* DEBUG */
+
+ write( fd , sbuf , ssiz );
+ endfrom = s_textsize / (HASHFRACTION * sizeof(*froms));
+ for ( fromindex = 0 ; fromindex < endfrom ; fromindex++ ) {
+ if ( froms[fromindex] == 0 ) {
+ continue;
+ }
+ frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof(*froms));
+ for (toindex=froms[fromindex]; toindex!=0; toindex=tos[toindex].link) {
+# ifdef DEBUG
+ fprintf( stderr ,
+ "[mcleanup] frompc 0x%x selfpc 0x%x count %d\n" ,
+ frompc , tos[toindex].selfpc , tos[toindex].count );
+# endif /* DEBUG */
+ rawarc.raw_frompc = (unsigned long) frompc;
+ rawarc.raw_selfpc = (unsigned long) tos[toindex].selfpc;
+ rawarc.raw_count = tos[toindex].count;
+ write( fd , &rawarc , sizeof rawarc );
+ }
+ }
+ close( fd );
+}
+
+#ifdef __x86_64__
+/* See GLIBC for additional information about this technique. */
+asm(".globl _mcount\n"
+ "\t.type\t_mcount, @function\n"
+ "_mcount:\n"
+ /* The compiler calls _mcount after the prologue, and does not
+ save any of the registers. Therefore we must preserve all
+ seven registers which may contain function arguments. */
+ "\tsubq\t$0x38,%rsp\n"
+ "\tmovq\t%rax,(%rsp)\n"
+ "\tmovq\t%rcx,0x08(%rsp)\n"
+ "\tmovq\t%rdx,0x10(%rsp)\n"
+ "\tmovq\t%rsi,0x18(%rsp)\n"
+ "\tmovq\t%rdi,0x20(%rsp)\n"
+ "\tmovq\t%r8,0x28(%rsp)\n"
+ "\tmovq\t%r9,0x30(%rsp)\n"
+ /* Get SELFPC (pushed by the call to this function) and
+ FROMPCINDEX (via the frame pointer. */
+ "\tmovq\t0x38(%rsp),%rdi\n"
+ "\tmovq\t0x8(%rbp),%rsi\n"
+ "\tcallq\tinternal_mcount\n"
+ /* Restore the saved registers. */
+ "\tmovq\t0x30(%rsp),%r9\n"
+ "\tmovq\t0x28(%rsp),%r8\n"
+ "\tmovq\t0x20(%rsp),%rdi\n"
+ "\tmovq\t0x18(%rsp),%rsi\n"
+ "\tmovq\t0x10(%rsp),%rdx\n"
+ "\tmovq\t0x08(%rsp),%rdx\n"
+ "\tmovq\t(%rsp),%rax\n"
+ "\taddq\t$0x38,%rsp\n"
+ "\tretq\n"
+ );
+#else
+/* Solaris 2 libraries use _mcount. */
+asm(".globl _mcount; _mcount: jmp internal_mcount");
+/* This is for compatibility with old versions of gcc which used mcount. */
+asm(".globl mcount; mcount: jmp internal_mcount");
+#endif
+
+void
+internal_mcount (
+#ifdef __x86_64__
+ char *selfpc,
+ unsigned short *frompcindex
+#else
+ void
+#endif
+ )
+{
+#ifndef __x86_64__
+ register char *selfpc;
+ register unsigned short *frompcindex;
+#endif
+ register struct tostruct *top;
+ register struct tostruct *prevtop;
+ register long toindex;
+ static char already_setup;
+
+#ifndef __x86_64__
+ /*
+ * find the return address for mcount,
+ * and the return address for mcount's caller.
+ */
+
+ /* selfpc = pc pushed by mcount call.
+ This identifies the function that was just entered. */
+ selfpc = (void *) __builtin_return_address (0);
+ /* frompcindex = pc in preceding frame.
+ This identifies the caller of the function just entered. */
+ frompcindex = (void *) __builtin_return_address (1);
+#endif
+
+ if(!already_setup) {
+ extern char etext[];
+ already_setup = 1;
+#ifdef __x86_64__
+ monstartup(0, etext);
+#else
+ monstartup((char*)0x08040000, etext);
+#endif
+#ifdef USE_ONEXIT
+ on_exit(_mcleanup, 0);
+#else
+ atexit(_mcleanup);
+#endif
+ }
+ /*
+ * check that we are profiling
+ * and that we aren't recursively invoked.
+ */
+ if (profiling) {
+ goto out;
+ }
+ profiling++;
+ /*
+ * check that frompcindex is a reasonable pc value.
+ * for example: signal catchers get called from the stack,
+ * not from text space. too bad.
+ */
+ frompcindex = (unsigned short *)((long)frompcindex - (long)s_lowpc);
+ if ((unsigned long)frompcindex > s_textsize) {
+ goto done;
+ }
+ frompcindex =
+ &froms[((long)frompcindex) / (HASHFRACTION * sizeof(*froms))];
+ toindex = *frompcindex;
+ if (toindex == 0) {
+ /*
+ * first time traversing this arc
+ */
+ toindex = ++tos[0].link;
+ if (toindex >= tolimit) {
+ goto overflow;
+ }
+ *frompcindex = toindex;
+ top = &tos[toindex];
+ top->selfpc = selfpc;
+ top->count = 1;
+ top->link = 0;
+ goto done;
+ }
+ top = &tos[toindex];
+ if (top->selfpc == selfpc) {
+ /*
+ * arc at front of chain; usual case.
+ */
+ top->count++;
+ goto done;
+ }
+ /*
+ * have to go looking down chain for it.
+ * top points to what we are looking at,
+ * prevtop points to previous top.
+ * we know it is not at the head of the chain.
+ */
+ for (; /* goto done */; ) {
+ if (top->link == 0) {
+ /*
+ * top is end of the chain and none of the chain
+ * had top->selfpc == selfpc.
+ * so we allocate a new tostruct
+ * and link it to the head of the chain.
+ */
+ toindex = ++tos[0].link;
+ if (toindex >= tolimit) {
+ goto overflow;
+ }
+ top = &tos[toindex];
+ top->selfpc = selfpc;
+ top->count = 1;
+ top->link = *frompcindex;
+ *frompcindex = toindex;
+ goto done;
+ }
+ /*
+ * otherwise, check the next arc on the chain.
+ */
+ prevtop = top;
+ top = &tos[top->link];
+ if (top->selfpc == selfpc) {
+ /*
+ * there it is.
+ * increment its count
+ * move it to the head of the chain.
+ */
+ top->count++;
+ toindex = prevtop->link;
+ prevtop->link = top->link;
+ top->link = *frompcindex;
+ *frompcindex = toindex;
+ goto done;
+ }
+
+ }
+done:
+ profiling--;
+ /* and fall through */
+out:
+ return; /* normal return restores saved registers */
+
+overflow:
+ profiling++; /* halt further profiling */
+# define TOLIMIT "mcount: tos overflow\n"
+ write(2, TOLIMIT, sizeof(TOLIMIT));
+ goto out;
+}
+
+/*
+ * Control profiling
+ * profiling is what mcount checks to see if
+ * all the data structures are ready.
+ */
+static void
+moncontrol(int mode)
+{
+ if (mode)
+ {
+ /* start */
+ profil((unsigned short *)(sbuf + sizeof(struct phdr)),
+ ssiz - sizeof(struct phdr),
+ (size_t)s_lowpc, s_scale);
+
+ profiling = 0;
+ } else {
+ /* stop */
+ profil((unsigned short *)0, 0, 0, 0);
+ profiling = 3;
+ }
+}
diff --git a/gcc-4.4.0/gcc/config/i386/gnu.h b/gcc-4.4.0/gcc/config/i386/gnu.h
new file mode 100644
index 000000000..077933bba
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/gnu.h
@@ -0,0 +1,56 @@
+/* Configuration for an i386 running GNU with ELF as the target machine. */
+
+/*
+Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+2005, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#undef GLIBC_DYNAMIC_LINKER
+#define GLIBC_DYNAMIC_LINKER "/lib/ld.so"
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (i386 GNU)");
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{pthread:-D_REENTRANT} %{posix:-D_POSIX_SOURCE} %{bsd:-D_BSD_SOURCE}"
+
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu)"
+
+#undef STARTFILE_SPEC
+#if defined HAVE_LD_PIE
+#define STARTFILE_SPEC \
+ "%{!shared: %{pg|p|profile:gcrt0.o%s;pie:Scrt1.o%s;static:crt0.o%s;:crt1.o%s}} \
+ crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#else
+#define STARTFILE_SPEC \
+ "%{!shared: %{pg|p|profile:gcrt0.o%s;static:crt0.o%s;:crt1.o%s}} \
+ crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#endif
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ "%{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+/* FIXME: Is a Hurd-specific fallback mechanism necessary? */
+#undef MD_UNWIND_SUPPORT
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* Not supported yet. */
+#undef TARGET_THREAD_SSP_OFFSET
+#endif
diff --git a/gcc-4.4.0/gcc/config/i386/gstabs.h b/gcc-4.4.0/gcc/config/i386/gstabs.h
new file mode 100644
index 000000000..e9a621871
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/gstabs.h
@@ -0,0 +1,7 @@
+/* We do not want to output SDB debugging information. */
+
+#undef SDB_DEBUGGING_INFO
+
+/* We want to output DBX debugging information. */
+
+#define DBX_DEBUGGING_INFO 1
diff --git a/gcc-4.4.0/gcc/config/i386/gthr-win32.c b/gcc-4.4.0/gcc/config/i386/gthr-win32.c
new file mode 100644
index 000000000..46ecb0d4b
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/gthr-win32.c
@@ -0,0 +1,260 @@
+/* Implementation of W32-specific threads compatibility routines for
+ libgcc2. */
+
+/* Copyright (C) 1999, 2000, 2002, 2004, 2008, 2009 Free Software Foundation, Inc.
+ Contributed by Mumit Khan <khan@xraylith.wisc.edu>.
+ Modified and moved to separate file by Danny Smith
+ <dannysmith@users.sourceforge.net>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <windows.h>
+#ifndef __GTHREAD_HIDE_WIN32API
+# define __GTHREAD_HIDE_WIN32API 1
+#endif
+#undef __GTHREAD_I486_INLINE_LOCK_PRIMITIVES
+#define __GTHREAD_I486_INLINE_LOCK_PRIMITIVES
+#include <gthr-win32.h>
+
+/* Windows32 threads specific definitions. The windows32 threading model
+ does not map well into pthread-inspired gcc's threading model, and so
+ there are caveats one needs to be aware of.
+
+ 1. The destructor supplied to __gthread_key_create is ignored for
+ generic x86-win32 ports. This will certainly cause memory leaks
+ due to unreclaimed eh contexts (sizeof (eh_context) is at least
+ 24 bytes for x86 currently).
+
+ This memory leak may be significant for long-running applications
+ that make heavy use of C++ EH.
+
+ However, Mingw runtime (version 0.3 or newer) provides a mechanism
+ to emulate pthreads key dtors; the runtime provides a special DLL,
+ linked in if -mthreads option is specified, that runs the dtors in
+ the reverse order of registration when each thread exits. If
+ -mthreads option is not given, a stub is linked in instead of the
+ DLL, which results in memory leak. Other x86-win32 ports can use
+ the same technique of course to avoid the leak.
+
+ 2. The error codes returned are non-POSIX like, and cast into ints.
+ This may cause incorrect error return due to truncation values on
+ hw where sizeof (DWORD) > sizeof (int).
+
+ 3. We are currently using a special mutex instead of the Critical
+ Sections, since Win9x does not support TryEnterCriticalSection
+ (while NT does).
+
+ The basic framework should work well enough. In the long term, GCC
+ needs to use Structured Exception Handling on Windows32. */
+
+int
+__gthr_win32_once (__gthread_once_t *once, void (*func) (void))
+{
+ if (once == NULL || func == NULL)
+ return EINVAL;
+
+ if (! once->done)
+ {
+ if (InterlockedIncrement (&(once->started)) == 0)
+ {
+ (*func) ();
+ once->done = TRUE;
+ }
+ else
+ {
+ /* Another thread is currently executing the code, so wait for it
+ to finish; yield the CPU in the meantime. If performance
+ does become an issue, the solution is to use an Event that
+ we wait on here (and set above), but that implies a place to
+ create the event before this routine is called. */
+ while (! once->done)
+ Sleep (0);
+ }
+ }
+ return 0;
+}
+
+/* Windows32 thread local keys don't support destructors; this leads to
+ leaks, especially in threaded applications making extensive use of
+ C++ EH. Mingw uses a thread-support DLL to work-around this problem. */
+
+int
+__gthr_win32_key_create (__gthread_key_t *key,
+ void (*dtor) (void *) __attribute__((unused)))
+{
+ int status = 0;
+ DWORD tls_index = TlsAlloc ();
+ if (tls_index != 0xFFFFFFFF)
+ {
+ *key = tls_index;
+#ifdef MINGW32_SUPPORTS_MT_EH
+ /* Mingw runtime will run the dtors in reverse order for each thread
+ when the thread exits. */
+ status = __mingwthr_key_dtor (*key, dtor);
+#endif
+ }
+ else
+ status = (int) GetLastError ();
+ return status;
+}
+
+int
+__gthr_win32_key_delete (__gthread_key_t key)
+{
+ return (TlsFree (key) != 0) ? 0 : (int) GetLastError ();
+}
+
+void *
+__gthr_win32_getspecific (__gthread_key_t key)
+{
+ DWORD lasterror;
+ void *ptr;
+ lasterror = GetLastError();
+ ptr = TlsGetValue(key);
+ SetLastError( lasterror );
+ return ptr;
+}
+
+int
+__gthr_win32_setspecific (__gthread_key_t key, const void *ptr)
+{
+ if (TlsSetValue (key, CONST_CAST2(void *, const void *, ptr)) != 0)
+ return 0;
+ else
+ return GetLastError ();
+}
+
+void
+__gthr_win32_mutex_init_function (__gthread_mutex_t *mutex)
+{
+ mutex->counter = -1;
+ mutex->sema = CreateSemaphore (NULL, 0, 65535, NULL);
+}
+
+void
+__gthr_win32_mutex_destroy (__gthread_mutex_t *mutex)
+{
+ CloseHandle ((HANDLE) mutex->sema);
+}
+
+int
+__gthr_win32_mutex_lock (__gthread_mutex_t *mutex)
+{
+ if (InterlockedIncrement (&mutex->counter) == 0 ||
+ WaitForSingleObject (mutex->sema, INFINITE) == WAIT_OBJECT_0)
+ return 0;
+ else
+ {
+ /* WaitForSingleObject returns WAIT_FAILED, and we can only do
+ some best-effort cleanup here. */
+ InterlockedDecrement (&mutex->counter);
+ return 1;
+ }
+}
+
+int
+__gthr_win32_mutex_trylock (__gthread_mutex_t *mutex)
+{
+ if (__GTHR_W32_InterlockedCompareExchange (&mutex->counter, 0, -1) < 0)
+ return 0;
+ else
+ return 1;
+}
+
+int
+__gthr_win32_mutex_unlock (__gthread_mutex_t *mutex)
+{
+ if (InterlockedDecrement (&mutex->counter) >= 0)
+ return ReleaseSemaphore (mutex->sema, 1, NULL) ? 0 : 1;
+ else
+ return 0;
+}
+
+void
+__gthr_win32_recursive_mutex_init_function (__gthread_recursive_mutex_t *mutex)
+{
+ mutex->counter = -1;
+ mutex->depth = 0;
+ mutex->owner = 0;
+ mutex->sema = CreateSemaphore (NULL, 0, 65535, NULL);
+}
+
+int
+__gthr_win32_recursive_mutex_lock (__gthread_recursive_mutex_t *mutex)
+{
+ DWORD me = GetCurrentThreadId();
+ if (InterlockedIncrement (&mutex->counter) == 0)
+ {
+ mutex->depth = 1;
+ mutex->owner = me;
+ }
+ else if (mutex->owner == me)
+ {
+ InterlockedDecrement (&mutex->counter);
+ ++(mutex->depth);
+ }
+ else if (WaitForSingleObject (mutex->sema, INFINITE) == WAIT_OBJECT_0)
+ {
+ mutex->depth = 1;
+ mutex->owner = me;
+ }
+ else
+ {
+ /* WaitForSingleObject returns WAIT_FAILED, and we can only do
+ some best-effort cleanup here. */
+ InterlockedDecrement (&mutex->counter);
+ return 1;
+ }
+ return 0;
+}
+
+int
+__gthr_win32_recursive_mutex_trylock (__gthread_recursive_mutex_t *mutex)
+{
+ DWORD me = GetCurrentThreadId();
+ if (__GTHR_W32_InterlockedCompareExchange (&mutex->counter, 0, -1) < 0)
+ {
+ mutex->depth = 1;
+ mutex->owner = me;
+ }
+ else if (mutex->owner == me)
+ ++(mutex->depth);
+ else
+ return 1;
+
+ return 0;
+}
+
+int
+__gthr_win32_recursive_mutex_unlock (__gthread_recursive_mutex_t *mutex)
+{
+ --(mutex->depth);
+ if (mutex->depth == 0)
+ {
+ mutex->owner = 0;
+
+ if (InterlockedDecrement (&mutex->counter) >= 0)
+ return ReleaseSemaphore (mutex->sema, 1, NULL) ? 0 : 1;
+ }
+
+ return 0;
+}
diff --git a/gcc-4.4.0/gcc/config/i386/host-cygwin.c b/gcc-4.4.0/gcc/config/i386/host-cygwin.c
new file mode 100755
index 000000000..1f831a6d4
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/host-cygwin.c
@@ -0,0 +1,80 @@
+/* Cygwin host-specific hook definitions.
+ Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include <sys/mman.h>
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "toplev.h"
+#include "diagnostic.h"
+
+static void * cygwin_gt_pch_get_address (size_t, int fd);
+static size_t cygwin_gt_pch_alloc_granularity (void);
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS cygwin_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_ALLOC_GRANULARITY
+#define HOST_HOOKS_GT_PCH_ALLOC_GRANULARITY cygwin_gt_pch_alloc_granularity
+
+/* Granularity for reserving address space. */
+static const size_t va_granularity = 0x10000;
+
+/* Return the alignment required for allocating virtual memory. */
+static size_t
+cygwin_gt_pch_alloc_granularity (void)
+{
+ return va_granularity;
+}
+
+/* Identify an address that's likely to be free in a subsequent invocation
+ of the compiler. The area should be able to hold SIZE bytes. FD is an
+ open file descriptor if the host would like to probe with mmap. */
+static void *
+cygwin_gt_pch_get_address (size_t sz, int fd)
+{
+ void *base;
+ off_t p = lseek(fd, 0, SEEK_CUR);
+
+ if (p == (off_t) -1)
+ fatal_error ("can't get position in PCH file: %m");
+
+ /* Cygwin requires that the underlying file be at least
+ as large as the requested mapping. */
+ if ((size_t) p < sz)
+ {
+ if ( ftruncate (fd, sz) == -1 )
+ fatal_error ("can't extend PCH file: %m");
+ }
+
+ base = mmap (NULL, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+
+ if (base == MAP_FAILED)
+ base = NULL;
+ else
+ munmap (base, sz);
+
+ if (lseek (fd, p, SEEK_SET) == (off_t) -1 )
+ fatal_error ("can't set position in PCH file: %m");
+
+ return base;
+}
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc-4.4.0/gcc/config/i386/host-i386-darwin.c b/gcc-4.4.0/gcc/config/i386/host-i386-darwin.c
new file mode 100644
index 000000000..03a19aa4c
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/host-i386-darwin.c
@@ -0,0 +1,30 @@
+/* i386-darwin host-specific hook definitions.
+ Copyright (C) 2003, 2005, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "config/host-darwin.h"
+
+/* Darwin doesn't do anything special for x86 hosts; this file exists just
+ to include config/host-darwin.h. */
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc-4.4.0/gcc/config/i386/host-mingw32.c b/gcc-4.4.0/gcc/config/i386/host-mingw32.c
new file mode 100644
index 000000000..1d2e59ab5
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/host-mingw32.c
@@ -0,0 +1,175 @@
+/* mingw32 host-specific hook definitions.
+ Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ <http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "hosthooks.h"
+#include "hosthooks-def.h"
+#include "toplev.h"
+#include "diagnostic.h"
+
+
+#define WIN32_LEAN_AND_MEAN /* Not so important if we have windows.h.gch. */
+#include <windows.h>
+
+static void * mingw32_gt_pch_get_address (size_t, int);
+static int mingw32_gt_pch_use_address (void *, size_t, int, size_t);
+static size_t mingw32_gt_pch_alloc_granularity (void);
+
+#undef HOST_HOOKS_GT_PCH_GET_ADDRESS
+#define HOST_HOOKS_GT_PCH_GET_ADDRESS mingw32_gt_pch_get_address
+#undef HOST_HOOKS_GT_PCH_USE_ADDRESS
+#define HOST_HOOKS_GT_PCH_USE_ADDRESS mingw32_gt_pch_use_address
+#undef HOST_HOOKS_GT_PCH_ALLOC_GRANULARITY
+#define HOST_HOOKS_GT_PCH_ALLOC_GRANULARITY mingw32_gt_pch_alloc_granularity
+
+static inline void w32_error(const char*, const char*, int, const char*);
+
+/* FIXME: Is this big enough? */
+static const size_t pch_VA_max_size = 128 * 1024 * 1024;
+
+/* Granularity for reserving address space. */
+static const size_t va_granularity = 0x10000;
+
+/* Print out the GetLastError() translation. */
+static inline void
+w32_error (const char* function, const char* file, int line,
+ const char* my_msg)
+{
+ LPSTR w32_msgbuf;
+ FormatMessageA (FORMAT_MESSAGE_ALLOCATE_BUFFER
+ | FORMAT_MESSAGE_FROM_SYSTEM
+ | FORMAT_MESSAGE_IGNORE_INSERTS
+ | FORMAT_MESSAGE_MAX_WIDTH_MASK,
+ NULL, GetLastError(),
+ MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+ (LPSTR) &w32_msgbuf, 0, NULL);
+ fprintf(stderr, "internal error in %s, at %s:%d: %s: %s\n",
+ function, trim_filename (file), line, my_msg, w32_msgbuf);
+ LocalFree ((HLOCAL)w32_msgbuf);
+}
+
+/* Granularity for reserving address space. */
+static size_t mingw32_gt_pch_alloc_granularity (void)
+{
+ return va_granularity;
+}
+
+/* Identify an address that's likely to be free in a subsequent invocation
+ of the compiler. The area should be able to hold SIZE bytes. FD is an
+ open file descriptor if the host would like to probe with mmap. */
+
+static void *
+mingw32_gt_pch_get_address (size_t size, int fd ATTRIBUTE_UNUSED)
+{
+ void* res;
+ size = (size + va_granularity - 1) & ~(va_granularity - 1);
+ if (size > pch_VA_max_size)
+ return NULL;
+
+ /* FIXME: We let system determine base by setting first arg to NULL.
+ Allocating at top of available address space avoids unnecessary
+ fragmentation of "ordinary" (malloc's) address space but may not
+ be safe with delayed load of system dll's. Preferred addresses
+ for NT system dlls is in 0x70000000 to 0x78000000 range.
+ If we allocate at bottom we need to reserve the address as early
+ as possible and at the same point in each invocation. */
+
+ res = VirtualAlloc (NULL, pch_VA_max_size,
+ MEM_RESERVE | MEM_TOP_DOWN,
+ PAGE_NOACCESS);
+ if (!res)
+ w32_error (__FUNCTION__, __FILE__, __LINE__, "VirtualAlloc");
+ else
+ /* We do not need the address space for now, so free it. */
+ VirtualFree (res, 0, MEM_RELEASE);
+
+ return res;
+}
+
+/* ADDR is an address returned by gt_pch_get_address. Attempt to allocate
+ SIZE bytes at the same address and load it with the data from FD at
+ OFFSET. Return -1 if we couldn't allocate memory at ADDR, return 0
+ if the memory is allocated but the data not loaded, return 1 if done. */
+
+static int
+mingw32_gt_pch_use_address (void *addr, size_t size, int fd,
+ size_t offset)
+{
+ void * mmap_addr;
+ HANDLE mmap_handle;
+
+ /* Apparently, MS Vista puts unnamed file mapping objects into Global
+ namespace when running an application in a Terminal Server
+ session. This causes failure since, by default, applications
+ don't get SeCreateGlobalPrivilege. We don't need global
+ memory sharing so explicitly put object into Local namespace.
+
+ There is also another issue, which appears if multiple concurrent
+ GCC processes are using PCH functionality. MapViewOfFileEx returns
+ "Access Denied" error. So we need to make the session-wide mapping
+ name unique. Let's use current process ID for that. */
+#define OBJECT_NAME_FMT "Local\\MinGWGCCPCH-"
+
+ /* Allocate enough space for name prefix and max possible DWORD
+ hexadecimal representation. */
+ char object_name[sizeof (OBJECT_NAME_FMT) + sizeof (DWORD) * 2];
+ snprintf (object_name, sizeof (object_name), OBJECT_NAME_FMT "%lx",
+ GetCurrentProcessId());
+
+ /* However, the documentation for CreateFileMapping says that on NT4
+ and earlier, backslashes are invalid in object name. So, we need
+ to check if we are on Windows2000 or higher. */
+ OSVERSIONINFO version_info;
+
+ if (size == 0)
+ return 0;
+
+ /* Offset must be also be a multiple of allocation granularity for
+ this to work. We can't change the offset. */
+ if ((offset & (va_granularity - 1)) != 0 || size > pch_VA_max_size)
+ return -1;
+
+ /* Determine the version of Windows we are running on. */
+ version_info.dwOSVersionInfoSize = sizeof (version_info);
+ GetVersionEx (&version_info);
+
+ mmap_handle = CreateFileMappingA ((HANDLE) _get_osfhandle (fd), NULL,
+ PAGE_WRITECOPY | SEC_COMMIT, 0, 0,
+ version_info.dwMajorVersion > 4
+ ? object_name : NULL);
+ if (mmap_handle == NULL)
+ {
+ w32_error (__FUNCTION__, __FILE__, __LINE__, "CreateFileMapping");
+ return -1;
+ }
+ mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
+ size, addr);
+ if (mmap_addr != addr)
+ {
+ w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx");
+ CloseHandle(mmap_handle);
+ return -1;
+ }
+
+ return 1;
+}
+
+const struct host_hooks host_hooks = HOST_HOOKS_INITIALIZER;
diff --git a/gcc-4.4.0/gcc/config/i386/i386-aout.h b/gcc-4.4.0/gcc/config/i386/i386-aout.h
new file mode 100644
index 000000000..e28f28c02
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/i386-aout.h
@@ -0,0 +1,25 @@
+/* Definitions for "naked" Intel 386 using a.out (or coff encap'd
+ a.out) object format and stabs debugging info.
+
+ Copyright (C) 1994, 2002, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+
+#define TARGET_VERSION fprintf (stderr, " (80386, BSD a.out syntax)");
+
+/* end of i386-aout.h */
diff --git a/gcc-4.4.0/gcc/config/i386/i386-c.c b/gcc-4.4.0/gcc/config/i386/i386-c.c
new file mode 100644
index 000000000..3d17c104e
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/i386-c.c
@@ -0,0 +1,351 @@
+/* Subroutines used for macro/preprocessor support on the ia-32.
+ Copyright (C) 2008
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "flags.h"
+#include "c-common.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "cpplib.h"
+#include "c-pragma.h"
+
+static bool ix86_pragma_target_parse (tree, tree);
+static void ix86_target_macros_internal
+ (int, enum processor_type, enum processor_type, enum fpmath_unit,
+ void (*def_or_undef) (cpp_reader *, const char *));
+
+
+/* Internal function to either define or undef the appropriate system
+ macros. */
+static void
+ix86_target_macros_internal (int isa_flag,
+ enum processor_type arch,
+ enum processor_type tune,
+ enum fpmath_unit fpmath,
+ void (*def_or_undef) (cpp_reader *,
+ const char *))
+{
+ /* For some of the k6/pentium varients there weren't seperate ISA bits to
+ identify which tune/arch flag was passed, so figure it out here. */
+ size_t arch_len = strlen (ix86_arch_string);
+ size_t tune_len = strlen (ix86_tune_string);
+ int last_arch_char = ix86_arch_string[arch_len - 1];
+ int last_tune_char = ix86_tune_string[tune_len - 1];
+
+ /* Built-ins based on -march=. */
+ switch (arch)
+ {
+ case PROCESSOR_I386:
+ break;
+ case PROCESSOR_I486:
+ def_or_undef (parse_in, "__i486");
+ def_or_undef (parse_in, "__i486__");
+ break;
+ case PROCESSOR_PENTIUM:
+ def_or_undef (parse_in, "__i586");
+ def_or_undef (parse_in, "__i586__");
+ def_or_undef (parse_in, "__pentium");
+ def_or_undef (parse_in, "__pentium__");
+ if (isa_flag & OPTION_MASK_ISA_MMX)
+ def_or_undef (parse_in, "__pentium_mmx__");
+ break;
+ case PROCESSOR_PENTIUMPRO:
+ def_or_undef (parse_in, "__i686");
+ def_or_undef (parse_in, "__i686__");
+ def_or_undef (parse_in, "__pentiumpro");
+ def_or_undef (parse_in, "__pentiumpro__");
+ break;
+ case PROCESSOR_GEODE:
+ def_or_undef (parse_in, "__geode");
+ def_or_undef (parse_in, "__geode__");
+ break;
+ case PROCESSOR_K6:
+ def_or_undef (parse_in, "__k6");
+ def_or_undef (parse_in, "__k6__");
+ if (last_arch_char == '2')
+ def_or_undef (parse_in, "__k6_2__");
+ else if (last_arch_char == '3')
+ def_or_undef (parse_in, "__k6_3__");
+ else if (isa_flag & OPTION_MASK_ISA_3DNOW)
+ def_or_undef (parse_in, "__k6_3__");
+ break;
+ case PROCESSOR_ATHLON:
+ def_or_undef (parse_in, "__athlon");
+ def_or_undef (parse_in, "__athlon__");
+ if (isa_flag & OPTION_MASK_ISA_SSE)
+ def_or_undef (parse_in, "__athlon_sse__");
+ break;
+ case PROCESSOR_K8:
+ def_or_undef (parse_in, "__k8");
+ def_or_undef (parse_in, "__k8__");
+ break;
+ case PROCESSOR_AMDFAM10:
+ def_or_undef (parse_in, "__amdfam10");
+ def_or_undef (parse_in, "__amdfam10__");
+ break;
+ case PROCESSOR_PENTIUM4:
+ def_or_undef (parse_in, "__pentium4");
+ def_or_undef (parse_in, "__pentium4__");
+ break;
+ case PROCESSOR_NOCONA:
+ def_or_undef (parse_in, "__nocona");
+ def_or_undef (parse_in, "__nocona__");
+ break;
+ case PROCESSOR_CORE2:
+ def_or_undef (parse_in, "__core2");
+ def_or_undef (parse_in, "__core2__");
+ break;
+ /* use PROCESSOR_max to not set/unset the arch macro. */
+ case PROCESSOR_max:
+ break;
+ case PROCESSOR_GENERIC32:
+ case PROCESSOR_GENERIC64:
+ gcc_unreachable ();
+ }
+
+ /* Built-ins based on -mtune=. */
+ switch (tune)
+ {
+ case PROCESSOR_I386:
+ def_or_undef (parse_in, "__tune_i386__");
+ break;
+ case PROCESSOR_I486:
+ def_or_undef (parse_in, "__tune_i486__");
+ break;
+ case PROCESSOR_PENTIUM:
+ def_or_undef (parse_in, "__tune_i586__");
+ def_or_undef (parse_in, "__tune_pentium__");
+ if (last_tune_char == 'x')
+ def_or_undef (parse_in, "__tune_pentium_mmx__");
+ break;
+ case PROCESSOR_PENTIUMPRO:
+ def_or_undef (parse_in, "__tune_i686__");
+ def_or_undef (parse_in, "__tune_pentiumpro__");
+ switch (last_tune_char)
+ {
+ case '3':
+ def_or_undef (parse_in, "__tune_pentium3__");
+ /* FALLTHRU */
+ case '2':
+ def_or_undef (parse_in, "__tune_pentium2__");
+ break;
+ }
+ break;
+ case PROCESSOR_GEODE:
+ def_or_undef (parse_in, "__tune_geode__");
+ break;
+ case PROCESSOR_K6:
+ def_or_undef (parse_in, "__tune_k6__");
+ if (last_tune_char == '2')
+ def_or_undef (parse_in, "__tune_k6_2__");
+ else if (last_tune_char == '3')
+ def_or_undef (parse_in, "__tune_k6_3__");
+ else if (isa_flag & OPTION_MASK_ISA_3DNOW)
+ def_or_undef (parse_in, "__tune_k6_3__");
+ break;
+ case PROCESSOR_ATHLON:
+ def_or_undef (parse_in, "__tune_athlon__");
+ if (isa_flag & OPTION_MASK_ISA_SSE)
+ def_or_undef (parse_in, "__tune_athlon_sse__");
+ break;
+ case PROCESSOR_K8:
+ def_or_undef (parse_in, "__tune_k8__");
+ break;
+ case PROCESSOR_AMDFAM10:
+ def_or_undef (parse_in, "__tune_amdfam10__");
+ break;
+ case PROCESSOR_PENTIUM4:
+ def_or_undef (parse_in, "__tune_pentium4__");
+ break;
+ case PROCESSOR_NOCONA:
+ def_or_undef (parse_in, "__tune_nocona__");
+ break;
+ case PROCESSOR_CORE2:
+ def_or_undef (parse_in, "__tune_core2__");
+ break;
+ case PROCESSOR_GENERIC32:
+ case PROCESSOR_GENERIC64:
+ break;
+ /* use PROCESSOR_max to not set/unset the tune macro. */
+ case PROCESSOR_max:
+ break;
+ }
+
+ if (isa_flag & OPTION_MASK_ISA_MMX)
+ def_or_undef (parse_in, "__MMX__");
+ if (isa_flag & OPTION_MASK_ISA_3DNOW)
+ def_or_undef (parse_in, "__3dNOW__");
+ if (isa_flag & OPTION_MASK_ISA_3DNOW_A)
+ def_or_undef (parse_in, "__3dNOW_A__");
+ if (isa_flag & OPTION_MASK_ISA_SSE)
+ def_or_undef (parse_in, "__SSE__");
+ if (isa_flag & OPTION_MASK_ISA_SSE2)
+ def_or_undef (parse_in, "__SSE2__");
+ if (isa_flag & OPTION_MASK_ISA_SSE3)
+ def_or_undef (parse_in, "__SSE3__");
+ if (isa_flag & OPTION_MASK_ISA_SSSE3)
+ def_or_undef (parse_in, "__SSSE3__");
+ if (isa_flag & OPTION_MASK_ISA_SSE4_1)
+ def_or_undef (parse_in, "__SSE4_1__");
+ if (isa_flag & OPTION_MASK_ISA_SSE4_2)
+ def_or_undef (parse_in, "__SSE4_2__");
+ if (isa_flag & OPTION_MASK_ISA_AES)
+ def_or_undef (parse_in, "__AES__");
+ if (isa_flag & OPTION_MASK_ISA_PCLMUL)
+ def_or_undef (parse_in, "__PCLMUL__");
+ if (isa_flag & OPTION_MASK_ISA_AVX)
+ def_or_undef (parse_in, "__AVX__");
+ if (isa_flag & OPTION_MASK_ISA_FMA)
+ def_or_undef (parse_in, "__FMA__");
+ if (isa_flag & OPTION_MASK_ISA_SSE4A)
+ def_or_undef (parse_in, "__SSE4A__");
+ if (isa_flag & OPTION_MASK_ISA_SSE5)
+ def_or_undef (parse_in, "__SSE5__");
+ if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
+ def_or_undef (parse_in, "__SSE_MATH__");
+ if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
+ def_or_undef (parse_in, "__SSE2_MATH__");
+}
+
+
+/* Hook to validate the current #pragma GCC target and set the state, and
+ update the macros based on what was changed. If ARGS is NULL, then
+ POP_TARGET is used to reset the options. */
+
+static bool
+ix86_pragma_target_parse (tree args, tree pop_target)
+{
+ tree prev_tree = build_target_option_node ();
+ tree cur_tree;
+ struct cl_target_option *prev_opt;
+ struct cl_target_option *cur_opt;
+ int prev_isa;
+ int cur_isa;
+ int diff_isa;
+ enum processor_type prev_arch;
+ enum processor_type prev_tune;
+ enum processor_type cur_arch;
+ enum processor_type cur_tune;
+
+ if (! args)
+ {
+ cur_tree = ((pop_target)
+ ? pop_target
+ : target_option_default_node);
+ cl_target_option_restore (TREE_TARGET_OPTION (cur_tree));
+ }
+ else
+ {
+ cur_tree = ix86_valid_target_attribute_tree (args);
+ if (!cur_tree)
+ return false;
+ }
+
+ target_option_current_node = cur_tree;
+
+ /* Figure out the previous/current isa, arch, tune and the differences. */
+ prev_opt = TREE_TARGET_OPTION (prev_tree);
+ cur_opt = TREE_TARGET_OPTION (cur_tree);
+ prev_isa = prev_opt->ix86_isa_flags;
+ cur_isa = cur_opt->ix86_isa_flags;
+ diff_isa = (prev_isa ^ cur_isa);
+ prev_arch = prev_opt->arch;
+ prev_tune = prev_opt->tune;
+ cur_arch = cur_opt->arch;
+ cur_tune = cur_opt->tune;
+
+ /* If the same processor is used for both previous and current options, don't
+ change the macros. */
+ if (cur_arch == prev_arch)
+ cur_arch = prev_arch = PROCESSOR_max;
+
+ if (cur_tune == prev_tune)
+ cur_tune = prev_tune = PROCESSOR_max;
+
+ /* Undef all of the macros for that are no longer current. */
+ ix86_target_macros_internal (prev_isa & diff_isa,
+ prev_arch,
+ prev_tune,
+ prev_opt->fpmath,
+ cpp_undef);
+
+ /* Define all of the macros for new options that were just turned on. */
+ ix86_target_macros_internal (cur_isa & diff_isa,
+ cur_arch,
+ cur_tune,
+ cur_opt->fpmath,
+ cpp_define);
+
+ return true;
+}
+
+/* Function to tell the preprocessor about the defines for the current target. */
+
+void
+ix86_target_macros (void)
+{
+ /* 32/64-bit won't change with target specific options, so do the assert and
+ builtin_define_std calls here. */
+ if (TARGET_64BIT)
+ {
+ cpp_assert (parse_in, "cpu=x86_64");
+ cpp_assert (parse_in, "machine=x86_64");
+ cpp_define (parse_in, "__amd64");
+ cpp_define (parse_in, "__amd64__");
+ cpp_define (parse_in, "__x86_64");
+ cpp_define (parse_in, "__x86_64__");
+ }
+ else
+ {
+ cpp_assert (parse_in, "cpu=i386");
+ cpp_assert (parse_in, "machine=i386");
+ builtin_define_std ("i386");
+ }
+
+ ix86_target_macros_internal (ix86_isa_flags,
+ ix86_arch,
+ ix86_tune,
+ ix86_fpmath,
+ cpp_define);
+}
+
+
+/* Register target pragmas. We need to add the hook for parsing #pragma GCC
+ option here rather than in i386.c since it will pull in various preprocessor
+ functions, and those are not present in languages like fortran without a
+ preprocessor. */
+
+void
+ix86_register_pragmas (void)
+{
+ /* Update pragma hook to allow parsing #pragma GCC target. */
+ targetm.target_option.pragma_parse = ix86_pragma_target_parse;
+
+#ifdef REGISTER_SUBTARGET_PRAGMAS
+ REGISTER_SUBTARGET_PRAGMAS ();
+#endif
+}
diff --git a/gcc-4.4.0/gcc/config/i386/i386-coff.h b/gcc-4.4.0/gcc/config/i386/i386-coff.h
new file mode 100644
index 000000000..af0204bb5
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/i386-coff.h
@@ -0,0 +1,69 @@
+/* Definitions for "naked" Intel 386 using coff object format files
+ and coff debugging info.
+
+ Copyright (C) 1994, 2000, 2002, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+
+#define TARGET_VERSION fprintf (stderr, " (80386, COFF BSD syntax)");
+
+#define TARGET_OS_CPP_BUILTINS() /* Sweet FA. */
+
+/* We want to be able to get DBX debugging information via -gstabs. */
+
+#define DBX_DEBUGGING_INFO 1
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE SDB_DEBUG
+
+/* Switch into a generic section. */
+#define TARGET_ASM_NAMED_SECTION default_coff_asm_named_section
+
+/* Prefix for internally generated assembler labels. If we aren't using
+ underscores, we are using prefix `.'s to identify labels that should
+ be ignored, as in `i386/gas.h' --karl@cs.umb.edu */
+
+#undef LPREFIX
+#define LPREFIX ".L"
+
+/* The prefix to add to user-visible assembler symbols. */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX ""
+
+/* If user-symbols don't have underscores,
+ then it must take more than `L' to identify
+ a label that should be ignored. */
+
+/* This is how to store into the string BUF
+ the symbol_ref name of an internal numbered label where
+ PREFIX is the class of label and NUM is the number within the class.
+ This is suitable for output with `assemble_name'. */
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(BUF,PREFIX,NUMBER) \
+ sprintf ((BUF), ".%s%ld", (PREFIX), (long)(NUMBER))
+
+/* GNU as expects alignment to be the number of bytes instead of the log for
+ COFF targets. */
+
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ if ((LOG)!=0) fprintf ((FILE), "\t.align %d\n", 1<<(LOG))
+
+/* end of i386-coff.h */
diff --git a/gcc-4.4.0/gcc/config/i386/i386-interix.h b/gcc-4.4.0/gcc/config/i386/i386-interix.h
new file mode 100644
index 000000000..380b46c3d
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/i386-interix.h
@@ -0,0 +1,362 @@
+/* Target definitions for GCC for Intel 80386 running Interix
+ Parts Copyright (C) 1991, 1999, 2000, 2002, 2003, 2004, 2007, 2008
+ Free Software Foundation, Inc.
+
+ Parts:
+ by Douglas B. Rupp (drupp@cs.washington.edu).
+ by Ron Guilmette (rfg@netcom.com).
+ by Donn Terry (donn@softway.com).
+ by Mumit Khan (khan@xraylith.wisc.edu).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* The rest must follow. */
+
+#define DBX_DEBUGGING_INFO 1
+#define SDB_DEBUGGING_INFO 1
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+#define HANDLE_SYSV_PRAGMA 1
+#undef HANDLE_PRAGMA_WEAK /* until the link format can handle it */
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+ and returns float values in the 387 and needs stack probes
+ We also align doubles to 64-bits for MSVC default compatibility
+ We do bitfields MSVC-compatibly by default, too. */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+ (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_STACK_PROBE | \
+ MASK_ALIGN_DOUBLE | MASK_MS_BITFIELD_LAYOUT)
+
+#undef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT_i486
+
+#define WCHAR_TYPE_SIZE 16
+#define WCHAR_TYPE "short unsigned int"
+
+/* WinNT (and thus Interix) use unsigned int */
+#define SIZE_TYPE "unsigned int"
+
+#define ASM_LOAD_ADDR(loc, reg) " leal " #loc "," #reg "\n"
+
+#define TARGET_DECLSPEC 1
+
+/* cpp handles __STDC__ */
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_define ("__INTERIX"); \
+ builtin_define ("__OPENNT"); \
+ builtin_define ("_M_IX86=300"); \
+ builtin_define ("_X86_=1"); \
+ builtin_define ("__stdcall=__attribute__((__stdcall__))"); \
+ builtin_define ("__cdecl=__attribute__((__cdecl__))"); \
+ builtin_assert ("system=unix"); \
+ builtin_assert ("system=interix"); \
+ if (preprocessing_asm_p ()) \
+ builtin_define_std ("LANGUAGE_ASSEMBLY"); \
+ else \
+ { \
+ builtin_define_std ("LANGUAGE_C"); \
+ if (c_dialect_cxx ()) \
+ builtin_define_std ("LANGUAGE_C_PLUS_PLUS"); \
+ if (c_dialect_objc ()) \
+ builtin_define_std ("LANGUAGE_OBJECTIVE_C"); \
+ } \
+ } \
+ while (0)
+
+#undef CPP_SPEC
+/* Write out the correct language type definition for the header files.
+ Unless we have assembler language, write out the symbols for C.
+ mieee is an Alpha specific variant. Cross pollination a bad idea.
+ */
+#define CPP_SPEC "-remap %{posix:-D_POSIX_SOURCE} \
+-isystem %$INTERIX_ROOT/usr/include"
+
+#define TARGET_VERSION fprintf (stderr, " (i386 Interix)");
+
+/* The global __fltused is necessary to cause the printf/scanf routines
+ for outputting/inputting floating point numbers to be loaded. Since this
+ is kind of hard to detect, we just do it all the time. */
+#undef X86_FILE_START_FLTUSED
+#define X86_FILE_START_FLTUSED 1
+
+/* A table of bytes codes used by the ASM_OUTPUT_ASCII and
+ ASM_OUTPUT_LIMITED_STRING macros. Each byte in the table
+ corresponds to a particular byte value [0..255]. For any
+ given byte value, if the value in the corresponding table
+ position is zero, the given character can be output directly.
+ If the table value is 1, the byte must be output as a \ooo
+ octal escape. If the tables value is anything else, then the
+ byte value should be output as a \ followed by the value
+ in the table. Note that we can use standard UN*X escape
+ sequences for many control characters, but we don't use
+ \a to represent BEL because some svr4 assemblers (e.g. on
+ the i386) don't know about that. Also, we don't use \v
+ since some versions of gas, such as 2.2 did not accept it. */
+
+#define ESCAPES \
+"\1\1\1\1\1\1\1\1btn\1fr\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\0\0\"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\\\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\
+\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1"
+
+/* Some svr4 assemblers have a limit on the number of characters which
+ can appear in the operand of a .string directive. If your assembler
+ has such a limitation, you should define STRING_LIMIT to reflect that
+ limit. Note that at least some svr4 assemblers have a limit on the
+ actual number of bytes in the double-quoted string, and that they
+ count each character in an escape sequence as one byte. Thus, an
+ escape sequence like \377 would count as four bytes.
+
+ If your target assembler doesn't support the .string directive, you
+ should define this to zero.
+*/
+
+#define STRING_LIMIT ((unsigned) 256)
+
+#define STRING_ASM_OP "\t.string\t"
+
+/* The routine used to output NUL terminated strings. We use a special
+ version of this for most svr4 targets because doing so makes the
+ generated assembly code more compact (and thus faster to assemble)
+ as well as more readable, especially for targets like the i386
+ (where the only alternative is to output character sequences as
+ comma separated lists of numbers). */
+
+#define ASM_OUTPUT_LIMITED_STRING(FILE, STR) \
+ do \
+ { \
+ const unsigned char *_limited_str = \
+ (const unsigned char *) (STR); \
+ unsigned ch; \
+ fprintf ((FILE), "%s\"", STRING_ASM_OP); \
+ for (; (ch = *_limited_str); _limited_str++) \
+ { \
+ int escape = ESCAPES[ch]; \
+ switch (escape) \
+ { \
+ case 0: \
+ putc (ch, (FILE)); \
+ break; \
+ case 1: \
+ fprintf ((FILE), "\\%03o", ch); \
+ break; \
+ default: \
+ putc ('\\', (FILE)); \
+ putc (escape, (FILE)); \
+ break; \
+ } \
+ } \
+ fprintf ((FILE), "\"\n"); \
+ } \
+ while (0)
+
+/* The routine used to output sequences of byte values. We use a special
+ version of this for most svr4 targets because doing so makes the
+ generated assembly code more compact (and thus faster to assemble)
+ as well as more readable. Note that if we find subparts of the
+ character sequence which end with NUL (and which are shorter than
+ STRING_LIMIT) we output those using ASM_OUTPUT_LIMITED_STRING. */
+
+#undef ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH) \
+ do \
+ { \
+ const unsigned char *_ascii_bytes = \
+ (const unsigned char *) (STR); \
+ const unsigned char *limit = _ascii_bytes + (LENGTH); \
+ unsigned bytes_in_chunk = 0; \
+ for (; _ascii_bytes < limit; _ascii_bytes++) \
+ { \
+ const unsigned char *p; \
+ if (bytes_in_chunk >= 64) \
+ { \
+ fputc ('\n', (FILE)); \
+ bytes_in_chunk = 0; \
+ } \
+ for (p = _ascii_bytes; p < limit && *p != '\0'; p++) \
+ continue; \
+ if (p < limit && (p - _ascii_bytes) <= (long) STRING_LIMIT) \
+ { \
+ if (bytes_in_chunk > 0) \
+ { \
+ fputc ('\n', (FILE)); \
+ bytes_in_chunk = 0; \
+ } \
+ ASM_OUTPUT_LIMITED_STRING ((FILE), _ascii_bytes); \
+ _ascii_bytes = p; \
+ } \
+ else \
+ { \
+ if (bytes_in_chunk == 0) \
+ fprintf ((FILE), "\t.byte\t"); \
+ else \
+ fputc (',', (FILE)); \
+ fprintf ((FILE), "0x%02x", *_ascii_bytes); \
+ bytes_in_chunk += 5; \
+ } \
+ } \
+ if (bytes_in_chunk > 0) \
+ fprintf ((FILE), "\n"); \
+ } \
+ while (0)
+
+/* Emit code to check the stack when allocating more that 4000
+ bytes in one go. */
+
+#define CHECK_STACK_LIMIT 0x1000
+
+/* the following are OSF linker (not gld) specific... we don't want them */
+#undef HAS_INIT_SECTION
+#undef LD_INIT_SWITCH
+#undef LD_FINI_SWITCH
+
+/* The following are needed for us to be able to use winnt.c, but are not
+ otherwise meaningful to Interix. (The functions that use these are
+ never called because we don't do DLLs.) */
+#define TARGET_NOP_FUN_DLLIMPORT 1
+#define drectve_section() /* nothing */
+
+/* Objective-C has its own packing rules...
+ Objc tries to parallel the code in stor-layout.c at runtime
+ (see libobjc/encoding.c). This (compile-time) packing info isn't
+ available at runtime, so it's hopeless to try.
+
+ And if the user tries to set the flag for objc, give an error
+ so he has some clue. */
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS \
+do { \
+ if (strcmp (lang_hooks.name, "GNU Objective-C") == 0) \
+ { \
+ if ((target_flags & MASK_MS_BITFIELD_LAYOUT) != 0 \
+ && (target_flags_explicit & MASK_MS_BITFIELD_LAYOUT) != 0) \
+ { \
+ error ("ms-bitfields not supported for objc"); \
+ } \
+ target_flags &= ~MASK_MS_BITFIELD_LAYOUT; \
+ } \
+} while (0)
+
+#define EH_FRAME_IN_DATA_SECTION
+
+#define READONLY_DATA_SECTION_ASM_OP "\t.section\t.rdata,\"r\""
+
+/* The MS compilers take alignment as a number of bytes, so we do as well */
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ if ((LOG)!=0) fprintf ((FILE), "\t.balign %d\n", 1<<(LOG))
+
+/* The linker will take care of this, and having them causes problems with
+ ld -r (specifically -rU). */
+#define CTOR_LISTS_DEFINED_EXTERNALLY 1
+
+#define SET_ASM_OP "\t.set\t"
+/* Output a definition (implements alias) */
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2) \
+do \
+{ \
+ fprintf ((FILE), "%s", SET_ASM_OP); \
+ assemble_name (FILE, LABEL1); \
+ fprintf (FILE, ","); \
+ assemble_name (FILE, LABEL2); \
+ fprintf (FILE, "\n"); \
+ } \
+while (0)
+
+#define HOST_PTR_AS_INT unsigned long
+
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* The following two flags are usually "off" for i386, because some non-gnu
+ tools (for the i386) don't handle them. However, we don't have that
+ problem, so.... */
+
+/* Forward references to tags are allowed. */
+#define SDB_ALLOW_FORWARD_REFERENCES
+
+/* Unknown tags are also allowed. */
+#define SDB_ALLOW_UNKNOWN_REFERENCES
+
+/* The integer half of this list needs to be constant. However, there's
+ a lot of disagreement about what the floating point adjustments should
+ be. We pick one that works with gdb. (The underlying problem is
+ what to do about the segment registers. Since we have access to them
+ from /proc, we'll allow them to be accessed in gdb, even tho the
+ gcc compiler can't generate them. (There's some evidence that
+ MSVC does, but possibly only for certain special "canned" sequences.) */
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+(TARGET_64BIT ? dbx64_register_map[n] \
+ : (n) == 0 ? 0 \
+ : (n) == 1 ? 2 \
+ : (n) == 2 ? 1 \
+ : (n) == 3 ? 3 \
+ : (n) == 4 ? 6 \
+ : (n) == 5 ? 7 \
+ : (n) == 6 ? 5 \
+ : (n) == 7 ? 4 \
+ : ((n) >= FIRST_STACK_REG && (n) <= LAST_STACK_REG) ? (n)+8 \
+ : (-1))
+
+/* Define this macro if references to a symbol must be treated
+ differently depending on something about the variable or
+ function named by the symbol (such as what section it is in). */
+
+#define SUBTARGET_ENCODE_SECTION_INFO i386_pe_encode_section_info
+#undef TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING i386_pe_strip_name_encoding_full
+
+#if 0
+/* Turn this back on when the linker is updated to handle grouped
+ .data$ sections correctly. See corresponding note in i386/interix.c.
+ MK. */
+
+/* Interix uses explicit import from shared libraries. */
+#define MULTIPLE_SYMBOL_SPACES 1
+
+extern void i386_pe_unique_section (tree, int);
+#define TARGET_ASM_UNIQUE_SECTION i386_pe_unique_section
+#define TARGET_ASM_FUNCTION_RODATA_SECTION default_no_function_rodata_section
+
+#define SUPPORTS_ONE_ONLY 1
+#endif /* 0 */
+
+/* Switch into a generic section. */
+#define TARGET_ASM_NAMED_SECTION default_pe_asm_named_section
+
+/* DWARF2 Unwinding doesn't work with exception handling yet. */
+#define DWARF2_UNWIND_INFO 0
+
+/* Don't assume anything about the header files. */
+#define NO_IMPLICIT_EXTERN_C
+
+/* MSVC returns structs of up to 8 bytes via registers. */
+
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \
+ (TYPE_MODE (TYPE) == BLKmode \
+ || (AGGREGATE_TYPE_P (TYPE) && int_size_in_bytes (TYPE) > 8 )) \ No newline at end of file
diff --git a/gcc-4.4.0/gcc/config/i386/i386-interix3.h b/gcc-4.4.0/gcc/config/i386/i386-interix3.h
new file mode 100644
index 000000000..abd202c91
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/i386-interix3.h
@@ -0,0 +1,23 @@
+/* Target definitions for GCC for Intel 80386 running Interix V3.
+ Copyright (C) 2001, 2007 Free Software Foundation, Inc.
+ Contributed by Douglas B. Rupp (rupp@gnat.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}"
+
diff --git a/gcc-4.4.0/gcc/config/i386/i386-modes.def b/gcc-4.4.0/gcc/config/i386/i386-modes.def
new file mode 100644
index 000000000..9c948026f
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/i386-modes.def
@@ -0,0 +1,92 @@
+/* Definitions of target machine for GCC for IA-32.
+ Copyright (C) 2002, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* The x86_64 ABI specifies both XF and TF modes.
+ XFmode is __float80 is IEEE extended; TFmode is __float128
+ is IEEE quad. */
+
+FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_intel_96_format);
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* In ILP32 mode, XFmode has size 12 and alignment 4.
+ In LP64 mode, XFmode has size and alignment 16. */
+ADJUST_FLOAT_FORMAT (XF, (TARGET_128BIT_LONG_DOUBLE
+ ? &ieee_extended_intel_128_format
+ : TARGET_96_ROUND_53_LONG_DOUBLE
+ ? &ieee_extended_intel_96_round_53_format
+ : &ieee_extended_intel_96_format));
+ADJUST_BYTESIZE (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 12);
+ADJUST_ALIGNMENT (XF, TARGET_128BIT_LONG_DOUBLE ? 16 : 4);
+
+/* Add any extra modes needed to represent the condition code.
+
+ For the i386, we need separate modes when floating-point
+ equality comparisons are being done.
+
+ Add CCNO to indicate comparisons against zero that requires
+ Overflow flag to be unset. Sign bit test is used instead and
+ thus can be used to form "a&b>0" type of tests.
+
+ Add CCGC to indicate comparisons against zero that allows
+ unspecified garbage in the Carry flag. This mode is used
+ by inc/dec instructions.
+
+ Add CCGOC to indicate comparisons against zero that allows
+ unspecified garbage in the Carry and Overflow flag. This
+ mode is used to simulate comparisons of (a-b) and (a+b)
+ against zero using sub/cmp/add operations.
+
+ Add CCA to indicate that only the Above flag is valid.
+ Add CCC to indicate that only the Carry flag is valid.
+ Add CCO to indicate that only the Overflow flag is valid.
+ Add CCS to indicate that only the Sign flag is valid.
+ Add CCZ to indicate that only the Zero flag is valid. */
+
+CC_MODE (CCGC);
+CC_MODE (CCGOC);
+CC_MODE (CCNO);
+CC_MODE (CCA);
+CC_MODE (CCC);
+CC_MODE (CCO);
+CC_MODE (CCS);
+CC_MODE (CCZ);
+CC_MODE (CCFP);
+CC_MODE (CCFPU);
+
+/* Vector modes. */
+VECTOR_MODES (INT, 4); /* V4QI V2HI */
+VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
+VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */
+VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
+VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
+VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */
+VECTOR_MODE (INT, DI, 1); /* V1DI */
+VECTOR_MODE (INT, SI, 1); /* V1SI */
+VECTOR_MODE (INT, QI, 2); /* V2QI */
+VECTOR_MODE (INT, DI, 8); /* V8DI */
+VECTOR_MODE (INT, HI, 32); /* V32HI */
+VECTOR_MODE (INT, QI, 64); /* V64QI */
+VECTOR_MODE (FLOAT, DF, 8); /* V8DF */
+VECTOR_MODE (FLOAT, SF, 16); /* V16SF */
+
+INT_MODE (OI, 32);
+
+/* The symbol Pmode stands for one of the above machine modes (usually SImode).
+ The tm.h file specifies which one. It is not a distinct mode. */
diff --git a/gcc-4.4.0/gcc/config/i386/i386-protos.h b/gcc-4.4.0/gcc/config/i386/i386-protos.h
new file mode 100644
index 000000000..f0fa7ee46
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/i386-protos.h
@@ -0,0 +1,280 @@
+/* Definitions of target machine for GCC for IA-32.
+ Copyright (C) 1988, 1992, 1994, 1995, 1996, 1996, 1997, 1998, 1999,
+ 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Functions in i386.c */
+extern void override_options (bool);
+extern void optimization_options (int, int);
+
+extern int ix86_can_use_return_insn_p (void);
+extern int ix86_frame_pointer_required (void);
+extern void ix86_setup_frame_addresses (void);
+
+extern void ix86_file_end (void);
+extern int ix86_can_eliminate (int, int);
+extern HOST_WIDE_INT ix86_initial_elimination_offset (int, int);
+extern void ix86_expand_prologue (void);
+extern void ix86_expand_epilogue (int);
+
+extern void ix86_output_addr_vec_elt (FILE *, int);
+extern void ix86_output_addr_diff_elt (FILE *, int, int);
+
+#ifdef RTX_CODE
+extern int standard_80387_constant_p (rtx);
+extern const char *standard_80387_constant_opcode (rtx);
+extern rtx standard_80387_constant_rtx (int);
+extern int standard_sse_constant_p (rtx);
+extern const char *standard_sse_constant_opcode (rtx, rtx);
+extern int symbolic_reference_mentioned_p (rtx);
+extern bool extended_reg_mentioned_p (rtx);
+extern bool x86_extended_QIreg_mentioned_p (rtx);
+extern bool x86_extended_reg_mentioned_p (rtx);
+extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
+
+extern int ix86_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx);
+extern int ix86_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx);
+extern int ix86_expand_strlen (rtx, rtx, rtx, rtx);
+
+extern bool legitimate_constant_p (rtx);
+extern bool constant_address_p (rtx);
+extern bool legitimate_pic_operand_p (rtx);
+extern int legitimate_pic_address_disp_p (rtx);
+extern int legitimate_address_p (enum machine_mode, rtx, int);
+extern rtx legitimize_address (rtx, rtx, enum machine_mode);
+
+extern void print_reg (rtx, int, FILE*);
+extern void print_operand (FILE*, rtx, int);
+extern void print_operand_address (FILE*, rtx);
+extern bool output_addr_const_extra (FILE*, rtx);
+
+extern void split_di (rtx[], int, rtx[], rtx[]);
+extern void split_ti (rtx[], int, rtx[], rtx[]);
+
+extern const char *output_set_got (rtx, rtx);
+extern const char *output_387_binary_op (rtx, rtx*);
+extern const char *output_387_reg_move (rtx, rtx*);
+extern const char *output_fix_trunc (rtx, rtx*, int);
+extern const char *output_fp_compare (rtx, rtx*, int, int);
+
+extern void ix86_expand_clear (rtx);
+extern void ix86_expand_move (enum machine_mode, rtx[]);
+extern void ix86_expand_vector_move (enum machine_mode, rtx[]);
+extern void ix86_expand_vector_move_misalign (enum machine_mode, rtx[]);
+extern void ix86_expand_push (enum machine_mode, rtx);
+extern rtx ix86_fixup_binary_operands (enum rtx_code,
+ enum machine_mode, rtx[]);
+extern void ix86_fixup_binary_operands_no_copy (enum rtx_code,
+ enum machine_mode, rtx[]);
+extern void ix86_expand_binary_operator (enum rtx_code,
+ enum machine_mode, rtx[]);
+extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
+extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
+ rtx[]);
+extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
+extern void ix86_split_convert_uns_si_sse (rtx[]);
+extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx);
+extern void ix86_expand_convert_sign_didf_sse (rtx, rtx);
+extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool);
+extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
+ rtx[]);
+extern void ix86_expand_copysign (rtx []);
+extern void ix86_split_copysign_const (rtx []);
+extern void ix86_split_copysign_var (rtx []);
+extern int ix86_unary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
+extern int ix86_match_ccmode (rtx, enum machine_mode);
+extern rtx ix86_expand_compare (enum rtx_code, rtx *, rtx *);
+extern int ix86_use_fcomi_compare (enum rtx_code);
+extern void ix86_expand_branch (enum rtx_code, rtx);
+extern int ix86_expand_setcc (enum rtx_code, rtx);
+extern int ix86_expand_int_movcc (rtx[]);
+extern int ix86_expand_fp_movcc (rtx[]);
+extern bool ix86_expand_fp_vcond (rtx[]);
+extern bool ix86_expand_int_vcond (rtx[]);
+extern void ix86_expand_sse_unpack (rtx[], bool, bool);
+extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
+extern void ix86_expand_sse5_unpack (rtx[], bool, bool);
+extern void ix86_expand_sse5_pack (rtx[]);
+extern int ix86_expand_int_addcc (rtx[]);
+extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
+extern void x86_initialize_trampoline (rtx, rtx, rtx);
+extern rtx ix86_zero_extend_to_Pmode (rtx);
+extern void ix86_split_long_move (rtx[]);
+extern void ix86_split_ashl (rtx *, rtx, enum machine_mode);
+extern void ix86_split_ashr (rtx *, rtx, enum machine_mode);
+extern void ix86_split_lshr (rtx *, rtx, enum machine_mode);
+extern rtx ix86_find_base_term (rtx);
+extern int ix86_check_movabs (rtx, int);
+
+extern rtx assign_386_stack_local (enum machine_mode, enum ix86_stack_slot);
+extern int ix86_attr_length_immediate_default (rtx, int);
+extern int ix86_attr_length_address_default (rtx);
+extern int ix86_attr_length_vex_default (rtx, int, int);
+
+extern enum machine_mode ix86_fp_compare_mode (enum rtx_code);
+
+extern rtx ix86_libcall_value (enum machine_mode);
+extern bool ix86_function_value_regno_p (int);
+extern bool ix86_function_arg_regno_p (int);
+extern int ix86_function_arg_boundary (enum machine_mode, tree);
+extern bool ix86_sol10_return_in_memory (const_tree,const_tree);
+extern rtx ix86_force_to_memory (enum machine_mode, rtx);
+extern void ix86_free_from_memory (enum machine_mode);
+extern int ix86_cfun_abi (void);
+extern int ix86_function_abi (const_tree);
+extern int ix86_function_type_abi (const_tree);
+extern void ix86_call_abi_override (const_tree);
+extern tree ix86_fn_abi_va_list (tree);
+extern tree ix86_canonical_va_list_type (tree);
+extern int ix86_enum_va_list (int, const char **, tree *);
+extern int ix86_reg_parm_stack_space (const_tree);
+
+extern void ix86_split_fp_branch (enum rtx_code code, rtx, rtx,
+ rtx, rtx, rtx, rtx);
+extern bool ix86_hard_regno_mode_ok (int, enum machine_mode);
+extern bool ix86_modes_tieable_p (enum machine_mode, enum machine_mode);
+extern int ix86_register_move_cost (enum machine_mode, enum reg_class,
+ enum reg_class);
+extern int ix86_secondary_memory_needed (enum reg_class, enum reg_class,
+ enum machine_mode, int);
+extern bool ix86_cannot_change_mode_class (enum machine_mode,
+ enum machine_mode, enum reg_class);
+extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class);
+extern enum reg_class ix86_preferred_output_reload_class (rtx, enum reg_class);
+extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int);
+extern int ix86_mode_needed (int, rtx);
+extern void emit_i387_cw_initialization (int);
+extern bool ix86_fp_jump_nontrivial_p (enum rtx_code);
+extern void x86_order_regs_for_local_alloc (void);
+extern void x86_function_profiler (FILE *, int);
+extern void x86_emit_floatuns (rtx [2]);
+extern void ix86_emit_fp_unordered_jump (rtx);
+
+extern void ix86_emit_i387_log1p (rtx, rtx);
+extern void ix86_emit_swdivsf (rtx, rtx, rtx, enum machine_mode);
+extern void ix86_emit_swsqrtsf (rtx, rtx, enum machine_mode, bool);
+
+extern enum rtx_code ix86_reverse_condition (enum rtx_code, enum machine_mode);
+
+extern void ix86_expand_lround (rtx, rtx);
+extern void ix86_expand_lfloorceil (rtx, rtx, bool);
+extern void ix86_expand_rint (rtx, rtx);
+extern void ix86_expand_floorceil (rtx, rtx, bool);
+extern void ix86_expand_floorceildf_32 (rtx, rtx, bool);
+extern void ix86_expand_round (rtx, rtx);
+extern void ix86_expand_rounddf_32 (rtx, rtx);
+extern void ix86_expand_trunc (rtx, rtx);
+extern void ix86_expand_truncdf_32 (rtx, rtx);
+
+#ifdef TREE_CODE
+extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
+extern rtx function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int);
+extern void function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
+ tree, int);
+#endif
+
+#endif
+
+#ifdef TREE_CODE
+extern int ix86_return_pops_args (tree, tree, int);
+
+extern int ix86_data_alignment (tree, int);
+extern unsigned int ix86_local_alignment (tree, enum machine_mode,
+ unsigned int);
+extern unsigned int ix86_minimum_alignment (tree, enum machine_mode,
+ unsigned int);
+extern int ix86_constant_alignment (tree, int);
+extern tree ix86_handle_shared_attribute (tree *, tree, tree, int, bool *);
+extern tree ix86_handle_selectany_attribute (tree *, tree, tree, int, bool *);
+extern int x86_field_alignment (tree, int);
+extern tree ix86_valid_target_attribute_tree (tree);
+#endif
+
+extern rtx ix86_tls_get_addr (void);
+extern rtx ix86_tls_module_base (void);
+
+extern void ix86_expand_vector_init (bool, rtx, rtx);
+extern void ix86_expand_vector_set (bool, rtx, rtx, int);
+extern void ix86_expand_vector_extract (bool, rtx, rtx, int);
+extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx);
+
+extern bool ix86_sse5_valid_op_p (rtx [], rtx, int, bool, int, bool);
+extern void ix86_expand_sse5_multiple_memory (rtx [], int, enum machine_mode);
+
+/* In i386-c.c */
+extern void ix86_target_macros (void);
+extern void ix86_register_pragmas (void);
+
+/* In winnt.c */
+extern void i386_pe_unique_section (tree, int);
+extern void i386_pe_declare_function_type (FILE *, const char *, int);
+extern void i386_pe_record_external_function (tree, const char *);
+extern void i386_pe_maybe_record_exported_symbol (tree, const char *, int);
+extern void i386_pe_encode_section_info (tree, rtx, int);
+extern bool i386_pe_binds_local_p (const_tree);
+extern const char *i386_pe_strip_name_encoding_full (const char *);
+extern bool i386_pe_valid_dllimport_attribute_p (const_tree);
+extern unsigned int i386_pe_section_type_flags (tree, const char *, int);
+extern void i386_pe_asm_named_section (const char *, unsigned int, tree);
+extern void i386_pe_asm_output_aligned_decl_common (FILE *, tree,
+ const char *,
+ HOST_WIDE_INT,
+ HOST_WIDE_INT);
+extern void i386_pe_file_end (void);
+extern tree i386_pe_mangle_decl_assembler_name (tree, tree);
+
+/* In winnt-cxx.c and winnt-stubs.c */
+extern void i386_pe_adjust_class_at_definition (tree);
+extern bool i386_pe_type_dllimport_p (tree);
+extern bool i386_pe_type_dllexport_p (tree);
+
+extern rtx maybe_get_pool_constant (rtx);
+
+extern char internal_label_prefix[16];
+extern int internal_label_prefix_len;
+
+enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS };
+struct ix86_address
+{
+ rtx base, index, disp;
+ HOST_WIDE_INT scale;
+ enum ix86_address_seg seg;
+};
+
+extern int ix86_decompose_address (rtx, struct ix86_address *);
+extern int memory_address_length (rtx addr);
+extern void x86_output_aligned_bss (FILE *, tree, const char *,
+ unsigned HOST_WIDE_INT, int);
+extern void x86_elf_aligned_common (FILE *, const char *,
+ unsigned HOST_WIDE_INT, int);
+
+#ifdef RTX_CODE
+extern void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
+ enum rtx_code *, enum rtx_code *);
+extern enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
+extern rtx construct_plt_address (rtx);
+#endif
+extern int asm_preferred_eh_data_format (int, int);
+
+#ifdef HAVE_ATTR_cpu
+extern enum attr_cpu ix86_schedule;
+#endif
diff --git a/gcc-4.4.0/gcc/config/i386/i386.c b/gcc-4.4.0/gcc/config/i386/i386.c
new file mode 100644
index 000000000..d42f6c345
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/i386.c
@@ -0,0 +1,30118 @@
+/* Subroutines used for code generation on IA-32.
+ Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "real.h"
+#include "insn-config.h"
+#include "conditions.h"
+#include "output.h"
+#include "insn-codes.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "c-common.h"
+#include "except.h"
+#include "function.h"
+#include "recog.h"
+#include "expr.h"
+#include "optabs.h"
+#include "toplev.h"
+#include "basic-block.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "cgraph.h"
+#include "gimple.h"
+#include "dwarf2.h"
+#include "df.h"
+#include "tm-constrs.h"
+#include "params.h"
+#include "cselib.h"
+
+static int x86_builtin_vectorization_cost (bool);
+static rtx legitimize_dllimport_symbol (rtx, bool);
+
+#ifndef CHECK_STACK_LIMIT
+#define CHECK_STACK_LIMIT (0x1000)
+#endif
+
+/* Return index of given mode in mult and division cost tables. */
+#define MODE_INDEX(mode) \
+ ((mode) == QImode ? 0 \
+ : (mode) == HImode ? 1 \
+ : (mode) == SImode ? 2 \
+ : (mode) == DImode ? 3 \
+ : 4)
+
+/* Processor costs (relative to an add) */
+/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
+#define COSTS_N_BYTES(N) ((N) * 2)
+
+#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
+
+const
+struct processor_costs ix86_size_cost = {/* costs for tuning for size */
+ COSTS_N_BYTES (2), /* cost of an add instruction */
+ COSTS_N_BYTES (3), /* cost of a lea instruction */
+ COSTS_N_BYTES (2), /* variable shift costs */
+ COSTS_N_BYTES (3), /* constant shift costs */
+ {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
+ COSTS_N_BYTES (3), /* HI */
+ COSTS_N_BYTES (3), /* SI */
+ COSTS_N_BYTES (3), /* DI */
+ COSTS_N_BYTES (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
+ COSTS_N_BYTES (3), /* HI */
+ COSTS_N_BYTES (3), /* SI */
+ COSTS_N_BYTES (3), /* DI */
+ COSTS_N_BYTES (5)}, /* other */
+ COSTS_N_BYTES (3), /* cost of movsx */
+ COSTS_N_BYTES (3), /* cost of movzx */
+ 0, /* "large" insn */
+ 2, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {2, 2, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 2, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 2}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {2, 2, 2}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 3, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {3, 3}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 3, /* cost of moving SSE register */
+ {3, 3, 3}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {3, 3, 3}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 0, /* size of l1 cache */
+ 0, /* size of l2 cache */
+ 0, /* size of prefetch block */
+ 0, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
+ COSTS_N_BYTES (2), /* cost of FMUL instruction. */
+ COSTS_N_BYTES (2), /* cost of FDIV instruction. */
+ COSTS_N_BYTES (2), /* cost of FABS instruction. */
+ COSTS_N_BYTES (2), /* cost of FCHS instruction. */
+ COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
+ {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
+ {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
+ {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
+ {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 1, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 1, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+/* Processor costs (relative to an add) */
+static const
+struct processor_costs i386_cost = { /* 386 specific costs */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (3), /* variable shift costs */
+ COSTS_N_INSNS (2), /* constant shift costs */
+ {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (6), /* HI */
+ COSTS_N_INSNS (6), /* SI */
+ COSTS_N_INSNS (6), /* DI */
+ COSTS_N_INSNS (6)}, /* other */
+ COSTS_N_INSNS (1), /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (23), /* HI */
+ COSTS_N_INSNS (23), /* SI */
+ COSTS_N_INSNS (23), /* DI */
+ COSTS_N_INSNS (23)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
+ 15, /* "large" insn */
+ 3, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {8, 8, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {8, 8, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 8, 16}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 8, 16}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 0, /* size of l1 cache */
+ 0, /* size of l2 cache */
+ 0, /* size of prefetch block */
+ 0, /* number of parallel prefetches */
+ 1, /* Branch cost */
+ COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (27), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (88), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (22), /* cost of FABS instruction. */
+ COSTS_N_INSNS (24), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
+ {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
+ DUMMY_STRINGOP_ALGS},
+ {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+static const
+struct processor_costs i486_cost = { /* 486 specific costs */
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (3), /* variable shift costs */
+ COSTS_N_INSNS (2), /* constant shift costs */
+ {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (12), /* HI */
+ COSTS_N_INSNS (12), /* SI */
+ COSTS_N_INSNS (12), /* DI */
+ COSTS_N_INSNS (12)}, /* other */
+ 1, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (40), /* HI */
+ COSTS_N_INSNS (40), /* SI */
+ COSTS_N_INSNS (40), /* DI */
+ COSTS_N_INSNS (40)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
+ 15, /* "large" insn */
+ 3, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {8, 8, 8}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {8, 8, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 8, 16}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 8, 16}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 4, /* size of l1 cache. 486 has 8kB cache
+ shared for code and data, so 4kB is
+ not really precise. */
+ 4, /* size of l2 cache */
+ 0, /* size of prefetch block */
+ 0, /* number of parallel prefetches */
+ 1, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (16), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (73), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (3), /* cost of FABS instruction. */
+ COSTS_N_INSNS (3), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
+ {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
+ DUMMY_STRINGOP_ALGS},
+ {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+static const
+struct processor_costs pentium_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (4), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (11), /* HI */
+ COSTS_N_INSNS (11), /* SI */
+ COSTS_N_INSNS (11), /* DI */
+ COSTS_N_INSNS (11)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (25), /* HI */
+ COSTS_N_INSNS (25), /* SI */
+ COSTS_N_INSNS (25), /* DI */
+ COSTS_N_INSNS (25)}, /* other */
+ COSTS_N_INSNS (3), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
+ 8, /* "large" insn */
+ 6, /* MOVE_RATIO */
+ 6, /* cost for loading QImode using movzbl */
+ {2, 4, 2}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 4, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 8, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 8, 16}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 8, 16}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 8, /* size of l1 cache. */
+ 8, /* size of l2 cache */
+ 0, /* size of prefetch block */
+ 0, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (3), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (39), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
+ {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
+ DUMMY_STRINGOP_ALGS},
+ {{libcall, {{-1, rep_prefix_4_byte}}},
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+static const
+struct processor_costs pentiumpro_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (4), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (4)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (17), /* HI */
+ COSTS_N_INSNS (17), /* SI */
+ COSTS_N_INSNS (17), /* DI */
+ COSTS_N_INSNS (17)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 6, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 2, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {2, 2, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {2, 2, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ 8, /* size of l1 cache. */
+ 256, /* size of l2 cache */
+ 32, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (5), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (56), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
+ /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
+ the alignment). For small blocks inline loop is still a noticeable win, for bigger
+ blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
+ more expensive startup time in CPU, but after 4K the difference is down in the noise.
+ */
+ {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
+ {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
+ DUMMY_STRINGOP_ALGS},
+ {{rep_prefix_4_byte, {{1024, unrolled_loop},
+ {8192, rep_prefix_4_byte}, {-1, libcall}}},
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+static const
+struct processor_costs geode_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (2), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (7), /* SI */
+ COSTS_N_INSNS (7), /* DI */
+ COSTS_N_INSNS (7)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (23), /* HI */
+ COSTS_N_INSNS (39), /* SI */
+ COSTS_N_INSNS (39), /* DI */
+ COSTS_N_INSNS (39)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 4, /* MOVE_RATIO */
+ 1, /* cost for loading QImode using movzbl */
+ {1, 1, 1}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {1, 1, 1}, /* cost of storing integer registers */
+ 1, /* cost of reg,reg fld/fst */
+ {1, 1, 1}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 6, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+
+ 1, /* cost of moving MMX register */
+ {1, 1}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {1, 1}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 1, /* cost of moving SSE register */
+ {1, 1, 1}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {1, 1, 1}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 1, /* MMX or SSE register to integer */
+ 64, /* size of l1 cache. */
+ 128, /* size of l2 cache. */
+ 32, /* size of prefetch block */
+ 1, /* number of parallel prefetches */
+ 1, /* Branch cost */
+ COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (11), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (47), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
+ {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
+ DUMMY_STRINGOP_ALGS},
+ {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+static const
+struct processor_costs k6_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (3), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (3)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (18), /* HI */
+ COSTS_N_INSNS (18), /* SI */
+ COSTS_N_INSNS (18), /* DI */
+ COSTS_N_INSNS (18)}, /* other */
+ COSTS_N_INSNS (2), /* cost of movsx */
+ COSTS_N_INSNS (2), /* cost of movzx */
+ 8, /* "large" insn */
+ 4, /* MOVE_RATIO */
+ 3, /* cost for loading QImode using movzbl */
+ {4, 5, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 3, 2}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {6, 6, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 4}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {2, 2, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {2, 2, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 6, /* MMX or SSE register to integer */
+ 32, /* size of l1 cache. */
+ 32, /* size of l2 cache. Some models
+ have integrated l2 cache, but
+ optimizing for k6 is not important
+ enough to worry about that. */
+ 32, /* size of prefetch block */
+ 1, /* number of parallel prefetches */
+ 1, /* Branch cost */
+ COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (2), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (56), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
+ {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
+ DUMMY_STRINGOP_ALGS},
+ {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+static const
+struct processor_costs athlon_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (5), /* HI */
+ COSTS_N_INSNS (5), /* SI */
+ COSTS_N_INSNS (5), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 9, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {4, 4, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 4}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 4, 6}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 5}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 64, /* size of l1 cache. */
+ 256, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 5, /* Branch cost */
+ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (4), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (24), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
+ /* For some reason, Athlon deals better with REP prefix (relative to loops)
+ compared to K8. Alignment becomes important after 8 bytes for memcpy and
+ 128 bytes for memset. */
+ {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
+ DUMMY_STRINGOP_ALGS},
+ {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+static const
+struct processor_costs k8_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 9, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {4, 4, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 3, 6}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 5}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 64, /* size of l1 cache. */
+ 512, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ /* New AMD processors never drop prefetches; if they cannot be performed
+ immediately, they are queued. We set number of simultaneous prefetches
+ to a large constant to reflect this (it probably is not a good idea not
+ to limit number of prefetches at all, as their execution also takes some
+ time). */
+ 100, /* number of parallel prefetches */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (4), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (19), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
+ /* K8 has optimized REP instruction for medium sized blocks, but for very small
+ blocks it is better to use loop. For large blocks, libcall can do
+ nontemporary accesses and beat inline considerably. */
+ {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
+ {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ {{libcall, {{8, loop}, {24, unrolled_loop},
+ {2048, rep_prefix_4_byte}, {-1, libcall}}},
+ {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 4, /* scalar_stmt_cost. */
+ 2, /* scalar load_cost. */
+ 2, /* scalar_store_cost. */
+ 5, /* vec_stmt_cost. */
+ 0, /* vec_to_scalar_cost. */
+ 2, /* scalar_to_vec_cost. */
+ 2, /* vec_align_load_cost. */
+ 3, /* vec_unalign_load_cost. */
+ 3, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 2, /* cond_not_taken_branch_cost. */
+};
+
+struct processor_costs amdfam10_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (2), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (5)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (35), /* HI */
+ COSTS_N_INSNS (51), /* SI */
+ COSTS_N_INSNS (83), /* DI */
+ COSTS_N_INSNS (83)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 9, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {3, 4, 3}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {3, 4, 3}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {4, 4, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {3, 3}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 4, 3}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 5}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 3, /* MMX or SSE register to integer */
+ /* On K8
+ MOVD reg64, xmmreg Double FSTORE 4
+ MOVD reg32, xmmreg Double FSTORE 4
+ On AMDFAM10
+ MOVD reg64, xmmreg Double FADD 3
+ 1/1 1/1
+ MOVD reg32, xmmreg Double FADD 3
+ 1/1 1/1 */
+ 64, /* size of l1 cache. */
+ 512, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ /* New AMD processors never drop prefetches; if they cannot be performed
+ immediately, they are queued. We set number of simultaneous prefetches
+ to a large constant to reflect this (it probably is not a good idea not
+ to limit number of prefetches at all, as their execution also takes some
+ time). */
+ 100, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (4), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (19), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
+
+ /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
+ very small blocks it is better to use loop. For large blocks, libcall can
+ do nontemporary accesses and beat inline considerably. */
+ {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
+ {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ {{libcall, {{8, loop}, {24, unrolled_loop},
+ {2048, rep_prefix_4_byte}, {-1, libcall}}},
+ {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 4, /* scalar_stmt_cost. */
+ 2, /* scalar load_cost. */
+ 2, /* scalar_store_cost. */
+ 6, /* vec_stmt_cost. */
+ 0, /* vec_to_scalar_cost. */
+ 2, /* scalar_to_vec_cost. */
+ 2, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 2, /* vec_store_cost. */
+ 2, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+static const
+struct processor_costs pentium4_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (3), /* cost of a lea instruction */
+ COSTS_N_INSNS (4), /* variable shift costs */
+ COSTS_N_INSNS (4), /* constant shift costs */
+ {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (15), /* HI */
+ COSTS_N_INSNS (15), /* SI */
+ COSTS_N_INSNS (15), /* DI */
+ COSTS_N_INSNS (15)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (56), /* HI */
+ COSTS_N_INSNS (56), /* SI */
+ COSTS_N_INSNS (56), /* DI */
+ COSTS_N_INSNS (56)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 16, /* "large" insn */
+ 6, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {4, 5, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {2, 3, 2}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {2, 2, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 6}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {2, 2}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {2, 2}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 12, /* cost of moving SSE register */
+ {12, 12, 12}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {2, 2, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 10, /* MMX or SSE register to integer */
+ 8, /* size of l1 cache. */
+ 256, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (7), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (43), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
+ {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
+ DUMMY_STRINGOP_ALGS},
+ {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
+ {-1, libcall}}},
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+static const
+struct processor_costs nocona_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (10), /* HI */
+ COSTS_N_INSNS (10), /* SI */
+ COSTS_N_INSNS (10), /* DI */
+ COSTS_N_INSNS (10)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (66), /* HI */
+ COSTS_N_INSNS (66), /* SI */
+ COSTS_N_INSNS (66), /* DI */
+ COSTS_N_INSNS (66)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 16, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 3, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 4}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 6, /* cost of moving MMX register */
+ {12, 12}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {12, 12}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 6, /* cost of moving SSE register */
+ {12, 12, 12}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {12, 12, 12}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 8, /* MMX or SSE register to integer */
+ 8, /* size of l1 cache. */
+ 1024, /* size of l2 cache. */
+ 128, /* size of prefetch block */
+ 8, /* number of parallel prefetches */
+ 1, /* Branch cost */
+ COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (40), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (3), /* cost of FABS instruction. */
+ COSTS_N_INSNS (3), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
+ {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
+ {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
+ {100000, unrolled_loop}, {-1, libcall}}}},
+ {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
+ {-1, libcall}}},
+ {libcall, {{24, loop}, {64, unrolled_loop},
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+static const
+struct processor_costs core2_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (3), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (3), /* DI */
+ COSTS_N_INSNS (3)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (22), /* HI */
+ COSTS_N_INSNS (22), /* SI */
+ COSTS_N_INSNS (22), /* DI */
+ COSTS_N_INSNS (22)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 16, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {6, 6, 6}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {6, 6, 6}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 4}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {6, 6}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {6, 6, 6}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 4}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 2, /* MMX or SSE register to integer */
+ 32, /* size of l1 cache. */
+ 2048, /* size of l2 cache. */
+ 128, /* size of prefetch block */
+ 8, /* number of parallel prefetches */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (5), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (32), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (1), /* cost of FABS instruction. */
+ COSTS_N_INSNS (1), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
+ {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
+ {libcall, {{32, loop}, {64, rep_prefix_4_byte},
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ {{libcall, {{8, loop}, {15, unrolled_loop},
+ {2048, rep_prefix_4_byte}, {-1, libcall}}},
+ {libcall, {{24, loop}, {32, unrolled_loop},
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+/* Generic64 should produce code tuned for Nocona and K8. */
+static const
+struct processor_costs generic64_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ /* On all chips taken into consideration lea is 2 cycles and more. With
+ this cost however our current implementation of synth_mult results in
+ use of unnecessary temporary registers causing regression on several
+ SPECfp benchmarks. */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {8, 8, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {8, 8, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 32, /* size of l1 cache. */
+ 512, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
+ is increased to perhaps more appropriate value of 5. */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (8), /* cost of FABS instruction. */
+ COSTS_N_INSNS (8), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+ {DUMMY_STRINGOP_ALGS,
+ {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ {DUMMY_STRINGOP_ALGS,
+ {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
+static const
+struct processor_costs generic32_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {8, 8, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {8, 8, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 32, /* size of l1 cache. */
+ 256, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (8), /* cost of FABS instruction. */
+ COSTS_N_INSNS (8), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+ {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
+ DUMMY_STRINGOP_ALGS},
+ {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
+ DUMMY_STRINGOP_ALGS},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
+const struct processor_costs *ix86_cost = &pentium_cost;
+
+/* Processor feature/optimization bitmasks. */
+#define m_386 (1<<PROCESSOR_I386)
+#define m_486 (1<<PROCESSOR_I486)
+#define m_PENT (1<<PROCESSOR_PENTIUM)
+#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
+#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
+#define m_NOCONA (1<<PROCESSOR_NOCONA)
+#define m_CORE2 (1<<PROCESSOR_CORE2)
+
+#define m_GEODE (1<<PROCESSOR_GEODE)
+#define m_K6 (1<<PROCESSOR_K6)
+#define m_K6_GEODE (m_K6 | m_GEODE)
+#define m_K8 (1<<PROCESSOR_K8)
+#define m_ATHLON (1<<PROCESSOR_ATHLON)
+#define m_ATHLON_K8 (m_K8 | m_ATHLON)
+#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
+#define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
+
+#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
+#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
+
+/* Generic instruction choice should be common subset of supported CPUs
+ (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
+#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
+
+/* Feature tests against the various tunings. */
+unsigned char ix86_tune_features[X86_TUNE_LAST];
+
+/* Feature tests against the various tunings used to create ix86_tune_features
+ based on the processor mask. */
+static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
+ /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
+ negatively, so enabling for Generic64 seems like good code size
+ tradeoff. We can't enable it for 32bit generic because it does not
+ work well with PPro base chips. */
+ m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
+
+ /* X86_TUNE_PUSH_MEMORY */
+ m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
+ | m_NOCONA | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_ZERO_EXTEND_WITH_AND */
+ m_486 | m_PENT,
+
+ /* X86_TUNE_UNROLL_STRLEN */
+ m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_DEEP_BRANCH_PREDICTION */
+ m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
+
+ /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
+ on simulation result. But after P4 was made, no performance benefit
+ was observed with branch hints. It also increases the code size.
+ As a result, icc never generates branch hints. */
+ 0,
+
+ /* X86_TUNE_DOUBLE_WITH_ADD */
+ ~m_386,
+
+ /* X86_TUNE_USE_SAHF */
+ m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
+ | m_NOCONA | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
+ partial dependencies. */
+ m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
+ | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
+
+ /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
+ register stalls on Generic32 compilation setting as well. However
+ in current implementation the partial register stalls are not eliminated
+ very well - they can be introduced via subregs synthesized by combine
+ and can happen in caller/callee saving sequences. Because this option
+ pays back little on PPro based chips and is in conflict with partial reg
+ dependencies used by Athlon/P4 based chips, it is better to leave it off
+ for generic32 for now. */
+ m_PPRO,
+
+ /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
+ m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_USE_HIMODE_FIOP */
+ m_386 | m_486 | m_K6_GEODE,
+
+ /* X86_TUNE_USE_SIMODE_FIOP */
+ ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
+
+ /* X86_TUNE_USE_MOV0 */
+ m_K6,
+
+ /* X86_TUNE_USE_CLTD */
+ ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
+
+ /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
+ m_PENT4,
+
+ /* X86_TUNE_SPLIT_LONG_MOVES */
+ m_PPRO,
+
+ /* X86_TUNE_READ_MODIFY_WRITE */
+ ~m_PENT,
+
+ /* X86_TUNE_READ_MODIFY */
+ ~(m_PENT | m_PPRO),
+
+ /* X86_TUNE_PROMOTE_QIMODE */
+ m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
+ | m_GENERIC /* | m_PENT4 ? */,
+
+ /* X86_TUNE_FAST_PREFIX */
+ ~(m_PENT | m_486 | m_386),
+
+ /* X86_TUNE_SINGLE_STRINGOP */
+ m_386 | m_PENT4 | m_NOCONA,
+
+ /* X86_TUNE_QIMODE_MATH */
+ ~0,
+
+ /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
+ register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
+ might be considered for Generic32 if our scheme for avoiding partial
+ stalls was more effective. */
+ ~m_PPRO,
+
+ /* X86_TUNE_PROMOTE_QI_REGS */
+ 0,
+
+ /* X86_TUNE_PROMOTE_HI_REGS */
+ m_PPRO,
+
+ /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
+ m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_ADD_ESP_8 */
+ m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
+ | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_SUB_ESP_4 */
+ m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_SUB_ESP_8 */
+ m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
+ | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
+ for DFmode copies */
+ ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
+ | m_GENERIC | m_GEODE),
+
+ /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
+ m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
+ conflict here in between PPro/Pentium4 based chips that thread 128bit
+ SSE registers as single units versus K8 based chips that divide SSE
+ registers to two 64bit halves. This knob promotes all store destinations
+ to be 128bit to allow register renaming on 128bit SSE units, but usually
+ results in one extra microop on 64bit SSE units. Experimental results
+ shows that disabling this option on P4 brings over 20% SPECfp regression,
+ while enabling it on K8 brings roughly 2.4% regression that can be partly
+ masked by careful scheduling of moves. */
+ m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
+
+ /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
+ m_AMDFAM10,
+
+ /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
+ are resolved on SSE register parts instead of whole registers, so we may
+ maintain just lower part of scalar values in proper format leaving the
+ upper part undefined. */
+ m_ATHLON_K8,
+
+ /* X86_TUNE_SSE_TYPELESS_STORES */
+ m_AMD_MULTIPLE,
+
+ /* X86_TUNE_SSE_LOAD0_BY_PXOR */
+ m_PPRO | m_PENT4 | m_NOCONA,
+
+ /* X86_TUNE_MEMORY_MISMATCH_STALL */
+ m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_PROLOGUE_USING_MOVE */
+ m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_EPILOGUE_USING_MOVE */
+ m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_SHIFT1 */
+ ~m_486,
+
+ /* X86_TUNE_USE_FFREEP */
+ m_AMD_MULTIPLE,
+
+ /* X86_TUNE_INTER_UNIT_MOVES */
+ ~(m_AMD_MULTIPLE | m_GENERIC),
+
+ /* X86_TUNE_INTER_UNIT_CONVERSIONS */
+ ~(m_AMDFAM10),
+
+ /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
+ than 4 branch instructions in the 16 byte window. */
+ m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_SCHEDULE */
+ m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_USE_BT */
+ m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_USE_INCDEC */
+ ~(m_PENT4 | m_NOCONA | m_GENERIC),
+
+ /* X86_TUNE_PAD_RETURNS */
+ m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_EXT_80387_CONSTANTS */
+ m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
+
+ /* X86_TUNE_SHORTEN_X87_SSE */
+ ~m_K8,
+
+ /* X86_TUNE_AVOID_VECTOR_DECODE */
+ m_K8 | m_GENERIC64,
+
+ /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
+ and SImode multiply, but 386 and 486 do HImode multiply faster. */
+ ~(m_386 | m_486),
+
+ /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
+ vector path on AMD machines. */
+ m_K8 | m_GENERIC64 | m_AMDFAM10,
+
+ /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
+ machines. */
+ m_K8 | m_GENERIC64 | m_AMDFAM10,
+
+ /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
+ than a MOV. */
+ m_PENT,
+
+ /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
+ but one byte longer. */
+ m_PENT,
+
+ /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
+ operand that cannot be represented using a modRM byte. The XOR
+ replacement is long decoded, so this split helps here as well. */
+ m_K6,
+
+ /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
+ from FP to FP. */
+ m_AMDFAM10 | m_GENERIC,
+
+ /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
+ from integer to FP. */
+ m_AMDFAM10,
+
+ /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
+ with a subsequent conditional jump instruction into a single
+ compare-and-branch uop. */
+ m_CORE2,
+};
+
+/* Feature tests against the various architecture variations. */
+unsigned char ix86_arch_features[X86_ARCH_LAST];
+
+/* Feature tests against the various architecture variations, used to create
+ ix86_arch_features based on the processor mask. */
+static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
+ /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
+ ~(m_386 | m_486 | m_PENT | m_K6),
+
+ /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
+ ~m_386,
+
+ /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
+ ~(m_386 | m_486),
+
+ /* X86_ARCH_XADD: Exchange and add was added for 80486. */
+ ~m_386,
+
+ /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
+ ~m_386,
+};
+
+static const unsigned int x86_accumulate_outgoing_args
+ = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
+
+static const unsigned int x86_arch_always_fancy_math_387
+ = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
+ | m_NOCONA | m_CORE2 | m_GENERIC;
+
+static enum stringop_alg stringop_alg = no_stringop;
+
+/* In case the average insn count for single function invocation is
+ lower than this constant, emit fast (but longer) prologue and
+ epilogue code. */
+#define FAST_PROLOGUE_INSN_COUNT 20
+
+/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
+static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
+static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
+static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
+
+/* Array of the smallest class containing reg number REGNO, indexed by
+ REGNO. Used by REGNO_REG_CLASS in i386.h. */
+
+enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
+{
+ /* ax, dx, cx, bx */
+ AREG, DREG, CREG, BREG,
+ /* si, di, bp, sp */
+ SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
+ /* FP registers */
+ FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
+ FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
+ /* arg pointer */
+ NON_Q_REGS,
+ /* flags, fpsr, fpcr, frame */
+ NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
+ /* SSE registers */
+ SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
+ SSE_REGS, SSE_REGS,
+ /* MMX registers */
+ MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
+ MMX_REGS, MMX_REGS,
+ /* REX registers */
+ NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
+ NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
+ /* SSE REX registers */
+ SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
+ SSE_REGS, SSE_REGS,
+};
+
+/* The "default" register map used in 32bit mode. */
+
+int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
+{
+ 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
+ 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
+ -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
+ 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
+ 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
+};
+
+/* The "default" register map used in 64bit mode. */
+
+int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
+{
+ 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
+ 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
+ -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
+ 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
+ 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
+ 8,9,10,11,12,13,14,15, /* extended integer registers */
+ 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
+};
+
+/* Define the register numbers to be used in Dwarf debugging information.
+ The SVR4 reference port C compiler uses the following register numbers
+ in its Dwarf output code:
+ 0 for %eax (gcc regno = 0)
+ 1 for %ecx (gcc regno = 2)
+ 2 for %edx (gcc regno = 1)
+ 3 for %ebx (gcc regno = 3)
+ 4 for %esp (gcc regno = 7)
+ 5 for %ebp (gcc regno = 6)
+ 6 for %esi (gcc regno = 4)
+ 7 for %edi (gcc regno = 5)
+ The following three DWARF register numbers are never generated by
+ the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
+ believes these numbers have these meanings.
+ 8 for %eip (no gcc equivalent)
+ 9 for %eflags (gcc regno = 17)
+ 10 for %trapno (no gcc equivalent)
+ It is not at all clear how we should number the FP stack registers
+ for the x86 architecture. If the version of SDB on x86/svr4 were
+ a bit less brain dead with respect to floating-point then we would
+ have a precedent to follow with respect to DWARF register numbers
+ for x86 FP registers, but the SDB on x86/svr4 is so completely
+ broken with respect to FP registers that it is hardly worth thinking
+ of it as something to strive for compatibility with.
+ The version of x86/svr4 SDB I have at the moment does (partially)
+ seem to believe that DWARF register number 11 is associated with
+ the x86 register %st(0), but that's about all. Higher DWARF
+ register numbers don't seem to be associated with anything in
+ particular, and even for DWARF regno 11, SDB only seems to under-
+ stand that it should say that a variable lives in %st(0) (when
+ asked via an `=' command) if we said it was in DWARF regno 11,
+ but SDB still prints garbage when asked for the value of the
+ variable in question (via a `/' command).
+ (Also note that the labels SDB prints for various FP stack regs
+ when doing an `x' command are all wrong.)
+ Note that these problems generally don't affect the native SVR4
+ C compiler because it doesn't allow the use of -O with -g and
+ because when it is *not* optimizing, it allocates a memory
+ location for each floating-point variable, and the memory
+ location is what gets described in the DWARF AT_location
+ attribute for the variable in question.
+ Regardless of the severe mental illness of the x86/svr4 SDB, we
+ do something sensible here and we use the following DWARF
+ register numbers. Note that these are all stack-top-relative
+ numbers.
+ 11 for %st(0) (gcc regno = 8)
+ 12 for %st(1) (gcc regno = 9)
+ 13 for %st(2) (gcc regno = 10)
+ 14 for %st(3) (gcc regno = 11)
+ 15 for %st(4) (gcc regno = 12)
+ 16 for %st(5) (gcc regno = 13)
+ 17 for %st(6) (gcc regno = 14)
+ 18 for %st(7) (gcc regno = 15)
+*/
+int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
+{
+ 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
+ 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
+ -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
+ 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
+ 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
+};
+
+/* Test and compare insns in i386.md store the information needed to
+ generate branch and scc insns here. */
+
+rtx ix86_compare_op0 = NULL_RTX;
+rtx ix86_compare_op1 = NULL_RTX;
+rtx ix86_compare_emitted = NULL_RTX;
+
+/* Define parameter passing and return registers. */
+
+static int const x86_64_int_parameter_registers[6] =
+{
+ DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
+};
+
+static int const x86_64_ms_abi_int_parameter_registers[4] =
+{
+ CX_REG, DX_REG, R8_REG, R9_REG
+};
+
+static int const x86_64_int_return_registers[4] =
+{
+ AX_REG, DX_REG, DI_REG, SI_REG
+};
+
+/* Define the structure for the machine field in struct function. */
+
+struct stack_local_entry GTY(())
+{
+ unsigned short mode;
+ unsigned short n;
+ rtx rtl;
+ struct stack_local_entry *next;
+};
+
+/* Structure describing stack frame layout.
+ Stack grows downward:
+
+ [arguments]
+ <- ARG_POINTER
+ saved pc
+
+ saved frame pointer if frame_pointer_needed
+ <- HARD_FRAME_POINTER
+ [saved regs]
+
+ [padding0]
+
+ [saved SSE regs]
+
+ [padding1] \
+ )
+ [va_arg registers] (
+ > to_allocate <- FRAME_POINTER
+ [frame] (
+ )
+ [padding2] /
+ */
+struct ix86_frame
+{
+ int padding0;
+ int nsseregs;
+ int nregs;
+ int padding1;
+ int va_arg_size;
+ HOST_WIDE_INT frame;
+ int padding2;
+ int outgoing_arguments_size;
+ int red_zone_size;
+
+ HOST_WIDE_INT to_allocate;
+ /* The offsets relative to ARG_POINTER. */
+ HOST_WIDE_INT frame_pointer_offset;
+ HOST_WIDE_INT hard_frame_pointer_offset;
+ HOST_WIDE_INT stack_pointer_offset;
+
+ /* When save_regs_using_mov is set, emit prologue using
+ move instead of push instructions. */
+ bool save_regs_using_mov;
+};
+
+/* Code model option. */
+enum cmodel ix86_cmodel;
+/* Asm dialect. */
+enum asm_dialect ix86_asm_dialect = ASM_ATT;
+/* TLS dialects. */
+enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
+
+/* Which unit we are generating floating point math for. */
+enum fpmath_unit ix86_fpmath;
+
+/* Which cpu are we scheduling for. */
+enum attr_cpu ix86_schedule;
+
+/* Which cpu are we optimizing for. */
+enum processor_type ix86_tune;
+
+/* Which instruction set architecture to use. */
+enum processor_type ix86_arch;
+
+/* true if sse prefetch instruction is not NOOP. */
+int x86_prefetch_sse;
+
+/* ix86_regparm_string as a number */
+static int ix86_regparm;
+
+/* -mstackrealign option */
+extern int ix86_force_align_arg_pointer;
+static const char ix86_force_align_arg_pointer_string[]
+ = "force_align_arg_pointer";
+
+static rtx (*ix86_gen_leave) (void);
+static rtx (*ix86_gen_pop1) (rtx);
+static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
+static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
+static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
+static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
+static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
+static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
+
+/* Preferred alignment for stack boundary in bits. */
+unsigned int ix86_preferred_stack_boundary;
+
+/* Alignment for incoming stack boundary in bits specified at
+ command line. */
+static unsigned int ix86_user_incoming_stack_boundary;
+
+/* Default alignment for incoming stack boundary in bits. */
+static unsigned int ix86_default_incoming_stack_boundary;
+
+/* Alignment for incoming stack boundary in bits. */
+unsigned int ix86_incoming_stack_boundary;
+
+/* Values 1-5: see jump.c */
+int ix86_branch_cost;
+
+/* Calling abi specific va_list type nodes. */
+static GTY(()) tree sysv_va_list_type_node;
+static GTY(()) tree ms_va_list_type_node;
+
+/* Variables which are this size or smaller are put in the data/bss
+ or ldata/lbss sections. */
+
+int ix86_section_threshold = 65536;
+
+/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
+char internal_label_prefix[16];
+int internal_label_prefix_len;
+
+/* Fence to use after loop using movnt. */
+tree x86_mfence;
+
+/* Register class used for passing given 64bit part of the argument.
+ These represent classes as documented by the PS ABI, with the exception
+ of SSESF, SSEDF classes, that are basically SSE class, just gcc will
+ use SF or DFmode move instead of DImode to avoid reformatting penalties.
+
+ Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
+ whenever possible (upper half does contain padding). */
+enum x86_64_reg_class
+ {
+ X86_64_NO_CLASS,
+ X86_64_INTEGER_CLASS,
+ X86_64_INTEGERSI_CLASS,
+ X86_64_SSE_CLASS,
+ X86_64_SSESF_CLASS,
+ X86_64_SSEDF_CLASS,
+ X86_64_SSEUP_CLASS,
+ X86_64_X87_CLASS,
+ X86_64_X87UP_CLASS,
+ X86_64_COMPLEX_X87_CLASS,
+ X86_64_MEMORY_CLASS
+ };
+
+#define MAX_CLASSES 4
+
+/* Table of constants used by fldpi, fldln2, etc.... */
+static REAL_VALUE_TYPE ext_80387_constants_table [5];
+static bool ext_80387_constants_init = 0;
+
+
+static struct machine_function * ix86_init_machine_status (void);
+static rtx ix86_function_value (const_tree, const_tree, bool);
+static int ix86_function_regparm (const_tree, const_tree);
+static void ix86_compute_frame_layout (struct ix86_frame *);
+static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
+ rtx, rtx, int);
+static void ix86_add_new_builtins (int);
+
+enum ix86_function_specific_strings
+{
+ IX86_FUNCTION_SPECIFIC_ARCH,
+ IX86_FUNCTION_SPECIFIC_TUNE,
+ IX86_FUNCTION_SPECIFIC_FPMATH,
+ IX86_FUNCTION_SPECIFIC_MAX
+};
+
+static char *ix86_target_string (int, int, const char *, const char *,
+ const char *, bool);
+static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
+static void ix86_function_specific_save (struct cl_target_option *);
+static void ix86_function_specific_restore (struct cl_target_option *);
+static void ix86_function_specific_print (FILE *, int,
+ struct cl_target_option *);
+static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
+static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
+static bool ix86_can_inline_p (tree, tree);
+static void ix86_set_current_function (tree);
+
+
+/* The svr4 ABI for the i386 says that records and unions are returned
+ in memory. */
+#ifndef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+#endif
+
+/* Whether -mtune= or -march= were specified */
+static int ix86_tune_defaulted;
+static int ix86_arch_specified;
+
+/* Bit flags that specify the ISA we are compiling for. */
+int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
+
+/* A mask of ix86_isa_flags that includes bit X if X
+ was set or cleared on the command line. */
+static int ix86_isa_flags_explicit;
+
+/* Define a set of ISAs which are available when a given ISA is
+ enabled. MMX and SSE ISAs are handled separately. */
+
+#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
+#define OPTION_MASK_ISA_3DNOW_SET \
+ (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
+
+#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
+#define OPTION_MASK_ISA_SSE2_SET \
+ (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
+#define OPTION_MASK_ISA_SSE3_SET \
+ (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
+#define OPTION_MASK_ISA_SSSE3_SET \
+ (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
+#define OPTION_MASK_ISA_SSE4_1_SET \
+ (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
+#define OPTION_MASK_ISA_SSE4_2_SET \
+ (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
+#define OPTION_MASK_ISA_AVX_SET \
+ (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
+#define OPTION_MASK_ISA_FMA_SET \
+ (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
+
+/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
+ as -msse4.2. */
+#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
+
+#define OPTION_MASK_ISA_SSE4A_SET \
+ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
+#define OPTION_MASK_ISA_SSE5_SET \
+ (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
+
+/* AES and PCLMUL need SSE2 because they use xmm registers */
+#define OPTION_MASK_ISA_AES_SET \
+ (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
+#define OPTION_MASK_ISA_PCLMUL_SET \
+ (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
+
+#define OPTION_MASK_ISA_ABM_SET \
+ (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
+#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
+#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
+#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
+
+/* Define a set of ISAs which aren't available when a given ISA is
+ disabled. MMX and SSE ISAs are handled separately. */
+
+#define OPTION_MASK_ISA_MMX_UNSET \
+ (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
+#define OPTION_MASK_ISA_3DNOW_UNSET \
+ (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
+#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
+
+#define OPTION_MASK_ISA_SSE_UNSET \
+ (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
+#define OPTION_MASK_ISA_SSE2_UNSET \
+ (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
+#define OPTION_MASK_ISA_SSE3_UNSET \
+ (OPTION_MASK_ISA_SSE3 \
+ | OPTION_MASK_ISA_SSSE3_UNSET \
+ | OPTION_MASK_ISA_SSE4A_UNSET )
+#define OPTION_MASK_ISA_SSSE3_UNSET \
+ (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
+#define OPTION_MASK_ISA_SSE4_1_UNSET \
+ (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
+#define OPTION_MASK_ISA_SSE4_2_UNSET \
+ (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
+#define OPTION_MASK_ISA_AVX_UNSET \
+ (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
+#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
+
+/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
+ as -mno-sse4.1. */
+#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
+
+#define OPTION_MASK_ISA_SSE4A_UNSET \
+ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
+#define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
+#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
+#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
+#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
+#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
+#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
+#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
+
+/* Vectorization library interface and handlers. */
+tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
+static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
+static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
+
+/* Processor target table, indexed by processor number */
+struct ptt
+{
+ const struct processor_costs *cost; /* Processor costs */
+ const int align_loop; /* Default alignments. */
+ const int align_loop_max_skip;
+ const int align_jump;
+ const int align_jump_max_skip;
+ const int align_func;
+};
+
+static const struct ptt processor_target_table[PROCESSOR_max] =
+{
+ {&i386_cost, 4, 3, 4, 3, 4},
+ {&i486_cost, 16, 15, 16, 15, 16},
+ {&pentium_cost, 16, 7, 16, 7, 16},
+ {&pentiumpro_cost, 16, 15, 16, 10, 16},
+ {&geode_cost, 0, 0, 0, 0, 0},
+ {&k6_cost, 32, 7, 32, 7, 32},
+ {&athlon_cost, 16, 7, 16, 7, 16},
+ {&pentium4_cost, 0, 0, 0, 0, 0},
+ {&k8_cost, 16, 7, 16, 7, 16},
+ {&nocona_cost, 0, 0, 0, 0, 0},
+ {&core2_cost, 16, 10, 16, 10, 16},
+ {&generic32_cost, 16, 7, 16, 7, 16},
+ {&generic64_cost, 16, 10, 16, 10, 16},
+ {&amdfam10_cost, 32, 24, 32, 7, 32}
+};
+
+static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
+{
+ "generic",
+ "i386",
+ "i486",
+ "pentium",
+ "pentium-mmx",
+ "pentiumpro",
+ "pentium2",
+ "pentium3",
+ "pentium4",
+ "pentium-m",
+ "prescott",
+ "nocona",
+ "core2",
+ "geode",
+ "k6",
+ "k6-2",
+ "k6-3",
+ "athlon",
+ "athlon-4",
+ "k8",
+ "amdfam10"
+};
+
+/* Implement TARGET_HANDLE_OPTION. */
+
+static bool
+ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
+{
+ switch (code)
+ {
+ case OPT_mmmx:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
+ }
+ return true;
+
+ case OPT_m3dnow:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
+ }
+ return true;
+
+ case OPT_m3dnowa:
+ return false;
+
+ case OPT_msse:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
+ }
+ return true;
+
+ case OPT_msse2:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
+ }
+ return true;
+
+ case OPT_msse3:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
+ }
+ return true;
+
+ case OPT_mssse3:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
+ }
+ return true;
+
+ case OPT_msse4_1:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
+ }
+ return true;
+
+ case OPT_msse4_2:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
+ }
+ return true;
+
+ case OPT_mavx:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
+ }
+ return true;
+
+ case OPT_mfma:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
+ }
+ return true;
+
+ case OPT_msse4:
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
+ return true;
+
+ case OPT_mno_sse4:
+ ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
+ return true;
+
+ case OPT_msse4a:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
+ }
+ return true;
+
+ case OPT_msse5:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
+ }
+ return true;
+
+ case OPT_mabm:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
+ }
+ return true;
+
+ case OPT_mpopcnt:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
+ }
+ return true;
+
+ case OPT_msahf:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
+ }
+ return true;
+
+ case OPT_mcx16:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
+ }
+ return true;
+
+ case OPT_maes:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
+ }
+ return true;
+
+ case OPT_mpclmul:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
+ }
+ return true;
+
+ default:
+ return true;
+ }
+}
+
+/* Return a string the documents the current -m options. The caller is
+ responsible for freeing the string. */
+
+static char *
+ix86_target_string (int isa, int flags, const char *arch, const char *tune,
+ const char *fpmath, bool add_nl_p)
+{
+ struct ix86_target_opts
+ {
+ const char *option; /* option string */
+ int mask; /* isa mask options */
+ };
+
+ /* This table is ordered so that options like -msse5 or -msse4.2 that imply
+ preceding options while match those first. */
+ static struct ix86_target_opts isa_opts[] =
+ {
+ { "-m64", OPTION_MASK_ISA_64BIT },
+ { "-msse5", OPTION_MASK_ISA_SSE5 },
+ { "-msse4a", OPTION_MASK_ISA_SSE4A },
+ { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
+ { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
+ { "-mssse3", OPTION_MASK_ISA_SSSE3 },
+ { "-msse3", OPTION_MASK_ISA_SSE3 },
+ { "-msse2", OPTION_MASK_ISA_SSE2 },
+ { "-msse", OPTION_MASK_ISA_SSE },
+ { "-m3dnow", OPTION_MASK_ISA_3DNOW },
+ { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
+ { "-mmmx", OPTION_MASK_ISA_MMX },
+ { "-mabm", OPTION_MASK_ISA_ABM },
+ { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
+ { "-maes", OPTION_MASK_ISA_AES },
+ { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
+ };
+
+ /* Flag options. */
+ static struct ix86_target_opts flag_opts[] =
+ {
+ { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
+ { "-m80387", MASK_80387 },
+ { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
+ { "-malign-double", MASK_ALIGN_DOUBLE },
+ { "-mcld", MASK_CLD },
+ { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
+ { "-mieee-fp", MASK_IEEE_FP },
+ { "-minline-compares", MASK_INLINE_COMPARES },
+ { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
+ { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
+ { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
+ { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
+ { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
+ { "-mno-fused-madd", MASK_NO_FUSED_MADD },
+ { "-mno-push-args", MASK_NO_PUSH_ARGS },
+ { "-mno-red-zone", MASK_NO_RED_ZONE },
+ { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
+ { "-mrecip", MASK_RECIP },
+ { "-mrtd", MASK_RTD },
+ { "-msseregparm", MASK_SSEREGPARM },
+ { "-mstack-arg-probe", MASK_STACK_PROBE },
+ { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
+ };
+
+ const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
+
+ char isa_other[40];
+ char target_other[40];
+ unsigned num = 0;
+ unsigned i, j;
+ char *ret;
+ char *ptr;
+ size_t len;
+ size_t line_len;
+ size_t sep_len;
+
+ memset (opts, '\0', sizeof (opts));
+
+ /* Add -march= option. */
+ if (arch)
+ {
+ opts[num][0] = "-march=";
+ opts[num++][1] = arch;
+ }
+
+ /* Add -mtune= option. */
+ if (tune)
+ {
+ opts[num][0] = "-mtune=";
+ opts[num++][1] = tune;
+ }
+
+ /* Pick out the options in isa options. */
+ for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
+ {
+ if ((isa & isa_opts[i].mask) != 0)
+ {
+ opts[num++][0] = isa_opts[i].option;
+ isa &= ~ isa_opts[i].mask;
+ }
+ }
+
+ if (isa && add_nl_p)
+ {
+ opts[num++][0] = isa_other;
+ sprintf (isa_other, "(other isa: 0x%x)", isa);
+ }
+
+ /* Add flag options. */
+ for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
+ {
+ if ((flags & flag_opts[i].mask) != 0)
+ {
+ opts[num++][0] = flag_opts[i].option;
+ flags &= ~ flag_opts[i].mask;
+ }
+ }
+
+ if (flags && add_nl_p)
+ {
+ opts[num++][0] = target_other;
+ sprintf (target_other, "(other flags: 0x%x)", isa);
+ }
+
+ /* Add -fpmath= option. */
+ if (fpmath)
+ {
+ opts[num][0] = "-mfpmath=";
+ opts[num++][1] = fpmath;
+ }
+
+ /* Any options? */
+ if (num == 0)
+ return NULL;
+
+ gcc_assert (num < ARRAY_SIZE (opts));
+
+ /* Size the string. */
+ len = 0;
+ sep_len = (add_nl_p) ? 3 : 1;
+ for (i = 0; i < num; i++)
+ {
+ len += sep_len;
+ for (j = 0; j < 2; j++)
+ if (opts[i][j])
+ len += strlen (opts[i][j]);
+ }
+
+ /* Build the string. */
+ ret = ptr = (char *) xmalloc (len);
+ line_len = 0;
+
+ for (i = 0; i < num; i++)
+ {
+ size_t len2[2];
+
+ for (j = 0; j < 2; j++)
+ len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
+
+ if (i != 0)
+ {
+ *ptr++ = ' ';
+ line_len++;
+
+ if (add_nl_p && line_len + len2[0] + len2[1] > 70)
+ {
+ *ptr++ = '\\';
+ *ptr++ = '\n';
+ line_len = 0;
+ }
+ }
+
+ for (j = 0; j < 2; j++)
+ if (opts[i][j])
+ {
+ memcpy (ptr, opts[i][j], len2[j]);
+ ptr += len2[j];
+ line_len += len2[j];
+ }
+ }
+
+ *ptr = '\0';
+ gcc_assert (ret + len >= ptr);
+
+ return ret;
+}
+
+/* Function that is callable from the debugger to print the current
+ options. */
+void
+ix86_debug_options (void)
+{
+ char *opts = ix86_target_string (ix86_isa_flags, target_flags,
+ ix86_arch_string, ix86_tune_string,
+ ix86_fpmath_string, true);
+
+ if (opts)
+ {
+ fprintf (stderr, "%s\n\n", opts);
+ free (opts);
+ }
+ else
+ fprintf (stderr, "<no options>\n\n");
+
+ return;
+}
+
+/* Sometimes certain combinations of command options do not make
+ sense on a particular target machine. You can define a macro
+ `OVERRIDE_OPTIONS' to take account of this. This macro, if
+ defined, is executed once just after all the command options have
+ been parsed.
+
+ Don't use this macro to turn on various extra optimizations for
+ `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
+
+void
+override_options (bool main_args_p)
+{
+ int i;
+ unsigned int ix86_arch_mask, ix86_tune_mask;
+ const char *prefix;
+ const char *suffix;
+ const char *sw;
+
+ /* Comes from final.c -- no real reason to change it. */
+#define MAX_CODE_ALIGN 16
+
+ enum pta_flags
+ {
+ PTA_SSE = 1 << 0,
+ PTA_SSE2 = 1 << 1,
+ PTA_SSE3 = 1 << 2,
+ PTA_MMX = 1 << 3,
+ PTA_PREFETCH_SSE = 1 << 4,
+ PTA_3DNOW = 1 << 5,
+ PTA_3DNOW_A = 1 << 6,
+ PTA_64BIT = 1 << 7,
+ PTA_SSSE3 = 1 << 8,
+ PTA_CX16 = 1 << 9,
+ PTA_POPCNT = 1 << 10,
+ PTA_ABM = 1 << 11,
+ PTA_SSE4A = 1 << 12,
+ PTA_NO_SAHF = 1 << 13,
+ PTA_SSE4_1 = 1 << 14,
+ PTA_SSE4_2 = 1 << 15,
+ PTA_SSE5 = 1 << 16,
+ PTA_AES = 1 << 17,
+ PTA_PCLMUL = 1 << 18,
+ PTA_AVX = 1 << 19,
+ PTA_FMA = 1 << 20
+ };
+
+ static struct pta
+ {
+ const char *const name; /* processor name or nickname. */
+ const enum processor_type processor;
+ const enum attr_cpu schedule;
+ const unsigned /*enum pta_flags*/ flags;
+ }
+ const processor_alias_table[] =
+ {
+ {"i386", PROCESSOR_I386, CPU_NONE, 0},
+ {"i486", PROCESSOR_I486, CPU_NONE, 0},
+ {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
+ {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
+ {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
+ {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
+ {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
+ {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
+ {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
+ {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
+ {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
+ {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
+ {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+ PTA_MMX | PTA_SSE},
+ {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+ PTA_MMX | PTA_SSE},
+ {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+ PTA_MMX | PTA_SSE | PTA_SSE2},
+ {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
+ PTA_MMX |PTA_SSE | PTA_SSE2},
+ {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
+ PTA_MMX | PTA_SSE | PTA_SSE2},
+ {"prescott", PROCESSOR_NOCONA, CPU_NONE,
+ PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
+ {"nocona", PROCESSOR_NOCONA, CPU_NONE,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_CX16 | PTA_NO_SAHF},
+ {"core2", PROCESSOR_CORE2, CPU_CORE2,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSSE3 | PTA_CX16},
+ {"geode", PROCESSOR_GEODE, CPU_GEODE,
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
+ {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
+ {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
+ {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
+ {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
+ {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
+ {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
+ {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
+ {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
+ {"x86-64", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
+ {"k8", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_NO_SAHF},
+ {"k8-sse3", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
+ {"opteron", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_NO_SAHF},
+ {"opteron-sse3", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
+ {"athlon64", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_NO_SAHF},
+ {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
+ {"athlon-fx", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_NO_SAHF},
+ {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
+ {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
+ {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
+ 0 /* flags are only used for -march switch. */ },
+ {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
+ PTA_64BIT /* flags are only used for -march switch. */ },
+ };
+
+ int const pta_size = ARRAY_SIZE (processor_alias_table);
+
+ /* Set up prefix/suffix so the error messages refer to either the command
+ line argument, or the attribute(target). */
+ if (main_args_p)
+ {
+ prefix = "-m";
+ suffix = "";
+ sw = "switch";
+ }
+ else
+ {
+ prefix = "option(\"";
+ suffix = "\")";
+ sw = "attribute";
+ }
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+ SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
+ SUBSUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+ /* -fPIC is the default for x86_64. */
+ if (TARGET_MACHO && TARGET_64BIT)
+ flag_pic = 2;
+
+ /* Set the default values for switches whose default depends on TARGET_64BIT
+ in case they weren't overwritten by command line options. */
+ if (TARGET_64BIT)
+ {
+ /* Mach-O doesn't support omitting the frame pointer for now. */
+ if (flag_omit_frame_pointer == 2)
+ flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
+ if (flag_asynchronous_unwind_tables == 2)
+ flag_asynchronous_unwind_tables = 1;
+ if (flag_pcc_struct_return == 2)
+ flag_pcc_struct_return = 0;
+ }
+ else
+ {
+ if (flag_omit_frame_pointer == 2)
+ flag_omit_frame_pointer = 0;
+ if (flag_asynchronous_unwind_tables == 2)
+ flag_asynchronous_unwind_tables = 0;
+ if (flag_pcc_struct_return == 2)
+ flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
+ }
+
+ /* Need to check -mtune=generic first. */
+ if (ix86_tune_string)
+ {
+ if (!strcmp (ix86_tune_string, "generic")
+ || !strcmp (ix86_tune_string, "i686")
+ /* As special support for cross compilers we read -mtune=native
+ as -mtune=generic. With native compilers we won't see the
+ -mtune=native, as it was changed by the driver. */
+ || !strcmp (ix86_tune_string, "native"))
+ {
+ if (TARGET_64BIT)
+ ix86_tune_string = "generic64";
+ else
+ ix86_tune_string = "generic32";
+ }
+ /* If this call is for setting the option attribute, allow the
+ generic32/generic64 that was previously set. */
+ else if (!main_args_p
+ && (!strcmp (ix86_tune_string, "generic32")
+ || !strcmp (ix86_tune_string, "generic64")))
+ ;
+ else if (!strncmp (ix86_tune_string, "generic", 7))
+ error ("bad value (%s) for %stune=%s %s",
+ ix86_tune_string, prefix, suffix, sw);
+ }
+ else
+ {
+ if (ix86_arch_string)
+ ix86_tune_string = ix86_arch_string;
+ if (!ix86_tune_string)
+ {
+ ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
+ ix86_tune_defaulted = 1;
+ }
+
+ /* ix86_tune_string is set to ix86_arch_string or defaulted. We
+ need to use a sensible tune option. */
+ if (!strcmp (ix86_tune_string, "generic")
+ || !strcmp (ix86_tune_string, "x86-64")
+ || !strcmp (ix86_tune_string, "i686"))
+ {
+ if (TARGET_64BIT)
+ ix86_tune_string = "generic64";
+ else
+ ix86_tune_string = "generic32";
+ }
+ }
+ if (ix86_stringop_string)
+ {
+ if (!strcmp (ix86_stringop_string, "rep_byte"))
+ stringop_alg = rep_prefix_1_byte;
+ else if (!strcmp (ix86_stringop_string, "libcall"))
+ stringop_alg = libcall;
+ else if (!strcmp (ix86_stringop_string, "rep_4byte"))
+ stringop_alg = rep_prefix_4_byte;
+ else if (!strcmp (ix86_stringop_string, "rep_8byte")
+ && TARGET_64BIT)
+ /* rep; movq isn't available in 32-bit code. */
+ stringop_alg = rep_prefix_8_byte;
+ else if (!strcmp (ix86_stringop_string, "byte_loop"))
+ stringop_alg = loop_1_byte;
+ else if (!strcmp (ix86_stringop_string, "loop"))
+ stringop_alg = loop;
+ else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
+ stringop_alg = unrolled_loop;
+ else
+ error ("bad value (%s) for %sstringop-strategy=%s %s",
+ ix86_stringop_string, prefix, suffix, sw);
+ }
+ if (!strcmp (ix86_tune_string, "x86-64"))
+ warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
+ "%stune=k8%s or %stune=generic%s instead as appropriate.",
+ prefix, suffix, prefix, suffix, prefix, suffix);
+
+ if (!ix86_arch_string)
+ ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
+ else
+ ix86_arch_specified = 1;
+
+ if (!strcmp (ix86_arch_string, "generic"))
+ error ("generic CPU can be used only for %stune=%s %s",
+ prefix, suffix, sw);
+ if (!strncmp (ix86_arch_string, "generic", 7))
+ error ("bad value (%s) for %sarch=%s %s",
+ ix86_arch_string, prefix, suffix, sw);
+
+ if (ix86_cmodel_string != 0)
+ {
+ if (!strcmp (ix86_cmodel_string, "small"))
+ ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
+ else if (!strcmp (ix86_cmodel_string, "medium"))
+ ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
+ else if (!strcmp (ix86_cmodel_string, "large"))
+ ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
+ else if (flag_pic)
+ error ("code model %s does not support PIC mode", ix86_cmodel_string);
+ else if (!strcmp (ix86_cmodel_string, "32"))
+ ix86_cmodel = CM_32;
+ else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
+ ix86_cmodel = CM_KERNEL;
+ else
+ error ("bad value (%s) for %scmodel=%s %s",
+ ix86_cmodel_string, prefix, suffix, sw);
+ }
+ else
+ {
+ /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
+ use of rip-relative addressing. This eliminates fixups that
+ would otherwise be needed if this object is to be placed in a
+ DLL, and is essentially just as efficient as direct addressing. */
+ if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
+ ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
+ else if (TARGET_64BIT)
+ ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
+ else
+ ix86_cmodel = CM_32;
+ }
+ if (ix86_asm_string != 0)
+ {
+ if (! TARGET_MACHO
+ && !strcmp (ix86_asm_string, "intel"))
+ ix86_asm_dialect = ASM_INTEL;
+ else if (!strcmp (ix86_asm_string, "att"))
+ ix86_asm_dialect = ASM_ATT;
+ else
+ error ("bad value (%s) for %sasm=%s %s",
+ ix86_asm_string, prefix, suffix, sw);
+ }
+ if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
+ error ("code model %qs not supported in the %s bit mode",
+ ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
+ if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
+ sorry ("%i-bit mode not compiled in",
+ (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
+
+ for (i = 0; i < pta_size; i++)
+ if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
+ {
+ ix86_schedule = processor_alias_table[i].schedule;
+ ix86_arch = processor_alias_table[i].processor;
+ /* Default cpu tuning to the architecture. */
+ ix86_tune = ix86_arch;
+
+ if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
+ error ("CPU you selected does not support x86-64 "
+ "instruction set");
+
+ if (processor_alias_table[i].flags & PTA_MMX
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
+ ix86_isa_flags |= OPTION_MASK_ISA_MMX;
+ if (processor_alias_table[i].flags & PTA_3DNOW
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
+ ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
+ if (processor_alias_table[i].flags & PTA_3DNOW_A
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
+ ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
+ if (processor_alias_table[i].flags & PTA_SSE
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE;
+ if (processor_alias_table[i].flags & PTA_SSE2
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
+ if (processor_alias_table[i].flags & PTA_SSE3
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
+ if (processor_alias_table[i].flags & PTA_SSSE3
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
+ ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
+ if (processor_alias_table[i].flags & PTA_SSE4_1
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
+ if (processor_alias_table[i].flags & PTA_SSE4_2
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
+ if (processor_alias_table[i].flags & PTA_AVX
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX;
+ if (processor_alias_table[i].flags & PTA_FMA
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
+ ix86_isa_flags |= OPTION_MASK_ISA_FMA;
+ if (processor_alias_table[i].flags & PTA_SSE4A
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
+ if (processor_alias_table[i].flags & PTA_SSE5
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
+ if (processor_alias_table[i].flags & PTA_ABM
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
+ ix86_isa_flags |= OPTION_MASK_ISA_ABM;
+ if (processor_alias_table[i].flags & PTA_CX16
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
+ ix86_isa_flags |= OPTION_MASK_ISA_CX16;
+ if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
+ ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
+ if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
+ ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
+ if (processor_alias_table[i].flags & PTA_AES
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
+ ix86_isa_flags |= OPTION_MASK_ISA_AES;
+ if (processor_alias_table[i].flags & PTA_PCLMUL
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
+ ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
+ if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
+ x86_prefetch_sse = true;
+
+ break;
+ }
+
+ if (i == pta_size)
+ error ("bad value (%s) for %sarch=%s %s",
+ ix86_arch_string, prefix, suffix, sw);
+
+ ix86_arch_mask = 1u << ix86_arch;
+ for (i = 0; i < X86_ARCH_LAST; ++i)
+ ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
+
+ for (i = 0; i < pta_size; i++)
+ if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
+ {
+ ix86_schedule = processor_alias_table[i].schedule;
+ ix86_tune = processor_alias_table[i].processor;
+ if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
+ {
+ if (ix86_tune_defaulted)
+ {
+ ix86_tune_string = "x86-64";
+ for (i = 0; i < pta_size; i++)
+ if (! strcmp (ix86_tune_string,
+ processor_alias_table[i].name))
+ break;
+ ix86_schedule = processor_alias_table[i].schedule;
+ ix86_tune = processor_alias_table[i].processor;
+ }
+ else
+ error ("CPU you selected does not support x86-64 "
+ "instruction set");
+ }
+ /* Intel CPUs have always interpreted SSE prefetch instructions as
+ NOPs; so, we can enable SSE prefetch instructions even when
+ -mtune (rather than -march) points us to a processor that has them.
+ However, the VIA C3 gives a SIGILL, so we only do that for i686 and
+ higher processors. */
+ if (TARGET_CMOVE
+ && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
+ x86_prefetch_sse = true;
+ break;
+ }
+ if (i == pta_size)
+ error ("bad value (%s) for %stune=%s %s",
+ ix86_tune_string, prefix, suffix, sw);
+
+ ix86_tune_mask = 1u << ix86_tune;
+ for (i = 0; i < X86_TUNE_LAST; ++i)
+ ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
+
+ if (optimize_size)
+ ix86_cost = &ix86_size_cost;
+ else
+ ix86_cost = processor_target_table[ix86_tune].cost;
+
+ /* Arrange to set up i386_stack_locals for all functions. */
+ init_machine_status = ix86_init_machine_status;
+
+ /* Validate -mregparm= value. */
+ if (ix86_regparm_string)
+ {
+ if (TARGET_64BIT)
+ warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
+ i = atoi (ix86_regparm_string);
+ if (i < 0 || i > REGPARM_MAX)
+ error ("%sregparm=%d%s is not between 0 and %d",
+ prefix, i, suffix, REGPARM_MAX);
+ else
+ ix86_regparm = i;
+ }
+ if (TARGET_64BIT)
+ ix86_regparm = REGPARM_MAX;
+
+ /* If the user has provided any of the -malign-* options,
+ warn and use that value only if -falign-* is not set.
+ Remove this code in GCC 3.2 or later. */
+ if (ix86_align_loops_string)
+ {
+ warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
+ prefix, suffix, suffix);
+ if (align_loops == 0)
+ {
+ i = atoi (ix86_align_loops_string);
+ if (i < 0 || i > MAX_CODE_ALIGN)
+ error ("%salign-loops=%d%s is not between 0 and %d",
+ prefix, i, suffix, MAX_CODE_ALIGN);
+ else
+ align_loops = 1 << i;
+ }
+ }
+
+ if (ix86_align_jumps_string)
+ {
+ warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
+ prefix, suffix, suffix);
+ if (align_jumps == 0)
+ {
+ i = atoi (ix86_align_jumps_string);
+ if (i < 0 || i > MAX_CODE_ALIGN)
+ error ("%salign-loops=%d%s is not between 0 and %d",
+ prefix, i, suffix, MAX_CODE_ALIGN);
+ else
+ align_jumps = 1 << i;
+ }
+ }
+
+ if (ix86_align_funcs_string)
+ {
+ warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
+ prefix, suffix, suffix);
+ if (align_functions == 0)
+ {
+ i = atoi (ix86_align_funcs_string);
+ if (i < 0 || i > MAX_CODE_ALIGN)
+ error ("%salign-loops=%d%s is not between 0 and %d",
+ prefix, i, suffix, MAX_CODE_ALIGN);
+ else
+ align_functions = 1 << i;
+ }
+ }
+
+ /* Default align_* from the processor table. */
+ if (align_loops == 0)
+ {
+ align_loops = processor_target_table[ix86_tune].align_loop;
+ align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
+ }
+ if (align_jumps == 0)
+ {
+ align_jumps = processor_target_table[ix86_tune].align_jump;
+ align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
+ }
+ if (align_functions == 0)
+ {
+ align_functions = processor_target_table[ix86_tune].align_func;
+ }
+
+ /* Validate -mbranch-cost= value, or provide default. */
+ ix86_branch_cost = ix86_cost->branch_cost;
+ if (ix86_branch_cost_string)
+ {
+ i = atoi (ix86_branch_cost_string);
+ if (i < 0 || i > 5)
+ error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
+ else
+ ix86_branch_cost = i;
+ }
+ if (ix86_section_threshold_string)
+ {
+ i = atoi (ix86_section_threshold_string);
+ if (i < 0)
+ error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
+ else
+ ix86_section_threshold = i;
+ }
+
+ if (ix86_tls_dialect_string)
+ {
+ if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
+ ix86_tls_dialect = TLS_DIALECT_GNU;
+ else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
+ ix86_tls_dialect = TLS_DIALECT_GNU2;
+ else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
+ ix86_tls_dialect = TLS_DIALECT_SUN;
+ else
+ error ("bad value (%s) for %stls-dialect=%s %s",
+ ix86_tls_dialect_string, prefix, suffix, sw);
+ }
+
+ if (ix87_precision_string)
+ {
+ i = atoi (ix87_precision_string);
+ if (i != 32 && i != 64 && i != 80)
+ error ("pc%d is not valid precision setting (32, 64 or 80)", i);
+ }
+
+ if (TARGET_64BIT)
+ {
+ target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
+
+ /* Enable by default the SSE and MMX builtins. Do allow the user to
+ explicitly disable any of these. In particular, disabling SSE and
+ MMX for kernel code is extremely useful. */
+ if (!ix86_arch_specified)
+ ix86_isa_flags
+ |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
+ | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
+
+ if (TARGET_RTD)
+ warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
+ }
+ else
+ {
+ target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
+
+ if (!ix86_arch_specified)
+ ix86_isa_flags
+ |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
+
+ /* i386 ABI does not specify red zone. It still makes sense to use it
+ when programmer takes care to stack from being destroyed. */
+ if (!(target_flags_explicit & MASK_NO_RED_ZONE))
+ target_flags |= MASK_NO_RED_ZONE;
+ }
+
+ /* Keep nonleaf frame pointers. */
+ if (flag_omit_frame_pointer)
+ target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
+ else if (TARGET_OMIT_LEAF_FRAME_POINTER)
+ flag_omit_frame_pointer = 1;
+
+ /* If we're doing fast math, we don't care about comparison order
+ wrt NaNs. This lets us use a shorter comparison sequence. */
+ if (flag_finite_math_only)
+ target_flags &= ~MASK_IEEE_FP;
+
+ /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
+ since the insns won't need emulation. */
+ if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
+ target_flags &= ~MASK_NO_FANCY_MATH_387;
+
+ /* Likewise, if the target doesn't have a 387, or we've specified
+ software floating point, don't use 387 inline intrinsics. */
+ if (!TARGET_80387)
+ target_flags |= MASK_NO_FANCY_MATH_387;
+
+ /* Turn on MMX builtins for -msse. */
+ if (TARGET_SSE)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
+ x86_prefetch_sse = true;
+ }
+
+ /* Turn on popcnt instruction for -msse4.2 or -mabm. */
+ if (TARGET_SSE4_2 || TARGET_ABM)
+ ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
+
+ /* Validate -mpreferred-stack-boundary= value or default it to
+ PREFERRED_STACK_BOUNDARY_DEFAULT. */
+ ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
+ if (ix86_preferred_stack_boundary_string)
+ {
+ i = atoi (ix86_preferred_stack_boundary_string);
+ if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
+ error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
+ prefix, i, suffix, TARGET_64BIT ? 4 : 2);
+ else
+ ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
+ }
+
+ /* Set the default value for -mstackrealign. */
+ if (ix86_force_align_arg_pointer == -1)
+ ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
+
+ /* Validate -mincoming-stack-boundary= value or default it to
+ MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
+ if (ix86_force_align_arg_pointer)
+ ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
+ else
+ ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
+ ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
+ if (ix86_incoming_stack_boundary_string)
+ {
+ i = atoi (ix86_incoming_stack_boundary_string);
+ if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
+ error ("-mincoming-stack-boundary=%d is not between %d and 12",
+ i, TARGET_64BIT ? 4 : 2);
+ else
+ {
+ ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
+ ix86_incoming_stack_boundary
+ = ix86_user_incoming_stack_boundary;
+ }
+ }
+
+ /* Accept -msseregparm only if at least SSE support is enabled. */
+ if (TARGET_SSEREGPARM
+ && ! TARGET_SSE)
+ error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
+
+ ix86_fpmath = TARGET_FPMATH_DEFAULT;
+ if (ix86_fpmath_string != 0)
+ {
+ if (! strcmp (ix86_fpmath_string, "387"))
+ ix86_fpmath = FPMATH_387;
+ else if (! strcmp (ix86_fpmath_string, "sse"))
+ {
+ if (!TARGET_SSE)
+ {
+ warning (0, "SSE instruction set disabled, using 387 arithmetics");
+ ix86_fpmath = FPMATH_387;
+ }
+ else
+ ix86_fpmath = FPMATH_SSE;
+ }
+ else if (! strcmp (ix86_fpmath_string, "387,sse")
+ || ! strcmp (ix86_fpmath_string, "387+sse")
+ || ! strcmp (ix86_fpmath_string, "sse,387")
+ || ! strcmp (ix86_fpmath_string, "sse+387")
+ || ! strcmp (ix86_fpmath_string, "both"))
+ {
+ if (!TARGET_SSE)
+ {
+ warning (0, "SSE instruction set disabled, using 387 arithmetics");
+ ix86_fpmath = FPMATH_387;
+ }
+ else if (!TARGET_80387)
+ {
+ warning (0, "387 instruction set disabled, using SSE arithmetics");
+ ix86_fpmath = FPMATH_SSE;
+ }
+ else
+ ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
+ }
+ else
+ error ("bad value (%s) for %sfpmath=%s %s",
+ ix86_fpmath_string, prefix, suffix, sw);
+ }
+
+ /* If the i387 is disabled, then do not return values in it. */
+ if (!TARGET_80387)
+ target_flags &= ~MASK_FLOAT_RETURNS;
+
+ /* Use external vectorized library in vectorizing intrinsics. */
+ if (ix86_veclibabi_string)
+ {
+ if (strcmp (ix86_veclibabi_string, "svml") == 0)
+ ix86_veclib_handler = ix86_veclibabi_svml;
+ else if (strcmp (ix86_veclibabi_string, "acml") == 0)
+ ix86_veclib_handler = ix86_veclibabi_acml;
+ else
+ error ("unknown vectorization library ABI type (%s) for "
+ "%sveclibabi=%s %s", ix86_veclibabi_string,
+ prefix, suffix, sw);
+ }
+
+ if ((x86_accumulate_outgoing_args & ix86_tune_mask)
+ && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+ && !optimize_size)
+ target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+
+ /* ??? Unwind info is not correct around the CFG unless either a frame
+ pointer is present or M_A_O_A is set. Fixing this requires rewriting
+ unwind info generation to be aware of the CFG and propagating states
+ around edges. */
+ if ((flag_unwind_tables || flag_asynchronous_unwind_tables
+ || flag_exceptions || flag_non_call_exceptions)
+ && flag_omit_frame_pointer
+ && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
+ {
+ if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+ warning (0, "unwind tables currently require either a frame pointer "
+ "or %saccumulate-outgoing-args%s for correctness",
+ prefix, suffix);
+ target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+ }
+
+ /* If stack probes are required, the space used for large function
+ arguments on the stack must also be probed, so enable
+ -maccumulate-outgoing-args so this happens in the prologue. */
+ if (TARGET_STACK_PROBE
+ && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
+ {
+ if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+ warning (0, "stack probing requires %saccumulate-outgoing-args%s "
+ "for correctness", prefix, suffix);
+ target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+ }
+
+ /* For sane SSE instruction set generation we need fcomi instruction.
+ It is safe to enable all CMOVE instructions. */
+ if (TARGET_SSE)
+ TARGET_CMOVE = 1;
+
+ /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
+ {
+ char *p;
+ ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
+ p = strchr (internal_label_prefix, 'X');
+ internal_label_prefix_len = p - internal_label_prefix;
+ *p = '\0';
+ }
+
+ /* When scheduling description is not available, disable scheduler pass
+ so it won't slow down the compilation and make x87 code slower. */
+ if (!TARGET_SCHEDULE)
+ flag_schedule_insns_after_reload = flag_schedule_insns = 0;
+
+ if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
+ set_param_value ("simultaneous-prefetches",
+ ix86_cost->simultaneous_prefetches);
+ if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
+ set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
+ if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
+ set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
+ if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
+ set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
+
+ /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
+ can be optimized to ap = __builtin_next_arg (0). */
+ if (!TARGET_64BIT)
+ targetm.expand_builtin_va_start = NULL;
+
+ if (TARGET_64BIT)
+ {
+ ix86_gen_leave = gen_leave_rex64;
+ ix86_gen_pop1 = gen_popdi1;
+ ix86_gen_add3 = gen_adddi3;
+ ix86_gen_sub3 = gen_subdi3;
+ ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
+ ix86_gen_one_cmpl2 = gen_one_cmpldi2;
+ ix86_gen_monitor = gen_sse3_monitor64;
+ ix86_gen_andsp = gen_anddi3;
+ }
+ else
+ {
+ ix86_gen_leave = gen_leave;
+ ix86_gen_pop1 = gen_popsi1;
+ ix86_gen_add3 = gen_addsi3;
+ ix86_gen_sub3 = gen_subsi3;
+ ix86_gen_sub3_carry = gen_subsi3_carry;
+ ix86_gen_one_cmpl2 = gen_one_cmplsi2;
+ ix86_gen_monitor = gen_sse3_monitor;
+ ix86_gen_andsp = gen_andsi3;
+ }
+
+#ifdef USE_IX86_CLD
+ /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
+ if (!TARGET_64BIT)
+ target_flags |= MASK_CLD & ~target_flags_explicit;
+#endif
+
+ /* Save the initial options in case the user does function specific options */
+ if (main_args_p)
+ target_option_default_node = target_option_current_node
+ = build_target_option_node ();
+
+ /* Change -fstack-check to mean -mstack-arg-probe on x86. */
+ if (flag_stack_check)
+ {
+ flag_stack_check = 0;
+ target_flags |= MASK_STACK_PROBE;
+ }
+}
+
+/* Save the current options */
+
+static void
+ix86_function_specific_save (struct cl_target_option *ptr)
+{
+ gcc_assert (IN_RANGE (ix86_arch, 0, 255));
+ gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
+ gcc_assert (IN_RANGE (ix86_tune, 0, 255));
+ gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
+ gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
+
+ ptr->arch = ix86_arch;
+ ptr->schedule = ix86_schedule;
+ ptr->tune = ix86_tune;
+ ptr->fpmath = ix86_fpmath;
+ ptr->branch_cost = ix86_branch_cost;
+ ptr->tune_defaulted = ix86_tune_defaulted;
+ ptr->arch_specified = ix86_arch_specified;
+ ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
+ ptr->target_flags_explicit = target_flags_explicit;
+}
+
+/* Restore the current options */
+
+static void
+ix86_function_specific_restore (struct cl_target_option *ptr)
+{
+ enum processor_type old_tune = ix86_tune;
+ enum processor_type old_arch = ix86_arch;
+ unsigned int ix86_arch_mask, ix86_tune_mask;
+ int i;
+
+ ix86_arch = ptr->arch;
+ ix86_schedule = ptr->schedule;
+ ix86_tune = ptr->tune;
+ ix86_fpmath = ptr->fpmath;
+ ix86_branch_cost = ptr->branch_cost;
+ ix86_tune_defaulted = ptr->tune_defaulted;
+ ix86_arch_specified = ptr->arch_specified;
+ ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
+ target_flags_explicit = ptr->target_flags_explicit;
+
+ /* Recreate the arch feature tests if the arch changed */
+ if (old_arch != ix86_arch)
+ {
+ ix86_arch_mask = 1u << ix86_arch;
+ for (i = 0; i < X86_ARCH_LAST; ++i)
+ ix86_arch_features[i]
+ = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
+ }
+
+ /* Recreate the tune optimization tests */
+ if (old_tune != ix86_tune)
+ {
+ ix86_tune_mask = 1u << ix86_tune;
+ for (i = 0; i < X86_TUNE_LAST; ++i)
+ ix86_tune_features[i]
+ = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
+ }
+}
+
+/* Print the current options */
+
+static void
+ix86_function_specific_print (FILE *file, int indent,
+ struct cl_target_option *ptr)
+{
+ char *target_string
+ = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
+ NULL, NULL, NULL, false);
+
+ fprintf (file, "%*sarch = %d (%s)\n",
+ indent, "",
+ ptr->arch,
+ ((ptr->arch < TARGET_CPU_DEFAULT_max)
+ ? cpu_names[ptr->arch]
+ : "<unknown>"));
+
+ fprintf (file, "%*stune = %d (%s)\n",
+ indent, "",
+ ptr->tune,
+ ((ptr->tune < TARGET_CPU_DEFAULT_max)
+ ? cpu_names[ptr->tune]
+ : "<unknown>"));
+
+ fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
+ (ptr->fpmath & FPMATH_387) ? ", 387" : "",
+ (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
+ fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
+
+ if (target_string)
+ {
+ fprintf (file, "%*s%s\n", indent, "", target_string);
+ free (target_string);
+ }
+}
+
+
+/* Inner function to process the attribute((target(...))), take an argument and
+ set the current options from the argument. If we have a list, recursively go
+ over the list. */
+
+static bool
+ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
+{
+ char *next_optstr;
+ bool ret = true;
+
+#define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
+#define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
+#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
+#define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
+
+ enum ix86_opt_type
+ {
+ ix86_opt_unknown,
+ ix86_opt_yes,
+ ix86_opt_no,
+ ix86_opt_str,
+ ix86_opt_isa
+ };
+
+ static const struct
+ {
+ const char *string;
+ size_t len;
+ enum ix86_opt_type type;
+ int opt;
+ int mask;
+ } attrs[] = {
+ /* isa options */
+ IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
+ IX86_ATTR_ISA ("abm", OPT_mabm),
+ IX86_ATTR_ISA ("aes", OPT_maes),
+ IX86_ATTR_ISA ("avx", OPT_mavx),
+ IX86_ATTR_ISA ("mmx", OPT_mmmx),
+ IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
+ IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
+ IX86_ATTR_ISA ("sse", OPT_msse),
+ IX86_ATTR_ISA ("sse2", OPT_msse2),
+ IX86_ATTR_ISA ("sse3", OPT_msse3),
+ IX86_ATTR_ISA ("sse4", OPT_msse4),
+ IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
+ IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
+ IX86_ATTR_ISA ("sse4a", OPT_msse4a),
+ IX86_ATTR_ISA ("sse5", OPT_msse5),
+ IX86_ATTR_ISA ("ssse3", OPT_mssse3),
+
+ /* string options */
+ IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
+ IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
+ IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
+
+ /* flag options */
+ IX86_ATTR_YES ("cld",
+ OPT_mcld,
+ MASK_CLD),
+
+ IX86_ATTR_NO ("fancy-math-387",
+ OPT_mfancy_math_387,
+ MASK_NO_FANCY_MATH_387),
+
+ IX86_ATTR_NO ("fused-madd",
+ OPT_mfused_madd,
+ MASK_NO_FUSED_MADD),
+
+ IX86_ATTR_YES ("ieee-fp",
+ OPT_mieee_fp,
+ MASK_IEEE_FP),
+
+ IX86_ATTR_YES ("inline-compares",
+ OPT_minline_compares,
+ MASK_INLINE_COMPARES),
+
+ IX86_ATTR_YES ("inline-all-stringops",
+ OPT_minline_all_stringops,
+ MASK_INLINE_ALL_STRINGOPS),
+
+ IX86_ATTR_YES ("inline-stringops-dynamically",
+ OPT_minline_stringops_dynamically,
+ MASK_INLINE_STRINGOPS_DYNAMICALLY),
+
+ IX86_ATTR_NO ("align-stringops",
+ OPT_mno_align_stringops,
+ MASK_NO_ALIGN_STRINGOPS),
+
+ IX86_ATTR_YES ("recip",
+ OPT_mrecip,
+ MASK_RECIP),
+
+ };
+
+ /* If this is a list, recurse to get the options. */
+ if (TREE_CODE (args) == TREE_LIST)
+ {
+ bool ret = true;
+
+ for (; args; args = TREE_CHAIN (args))
+ if (TREE_VALUE (args)
+ && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
+ ret = false;
+
+ return ret;
+ }
+
+ else if (TREE_CODE (args) != STRING_CST)
+ gcc_unreachable ();
+
+ /* Handle multiple arguments separated by commas. */
+ next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
+
+ while (next_optstr && *next_optstr != '\0')
+ {
+ char *p = next_optstr;
+ char *orig_p = p;
+ char *comma = strchr (next_optstr, ',');
+ const char *opt_string;
+ size_t len, opt_len;
+ int opt;
+ bool opt_set_p;
+ char ch;
+ unsigned i;
+ enum ix86_opt_type type = ix86_opt_unknown;
+ int mask = 0;
+
+ if (comma)
+ {
+ *comma = '\0';
+ len = comma - next_optstr;
+ next_optstr = comma + 1;
+ }
+ else
+ {
+ len = strlen (p);
+ next_optstr = NULL;
+ }
+
+ /* Recognize no-xxx. */
+ if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
+ {
+ opt_set_p = false;
+ p += 3;
+ len -= 3;
+ }
+ else
+ opt_set_p = true;
+
+ /* Find the option. */
+ ch = *p;
+ opt = N_OPTS;
+ for (i = 0; i < ARRAY_SIZE (attrs); i++)
+ {
+ type = attrs[i].type;
+ opt_len = attrs[i].len;
+ if (ch == attrs[i].string[0]
+ && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
+ && memcmp (p, attrs[i].string, opt_len) == 0)
+ {
+ opt = attrs[i].opt;
+ mask = attrs[i].mask;
+ opt_string = attrs[i].string;
+ break;
+ }
+ }
+
+ /* Process the option. */
+ if (opt == N_OPTS)
+ {
+ error ("attribute(target(\"%s\")) is unknown", orig_p);
+ ret = false;
+ }
+
+ else if (type == ix86_opt_isa)
+ ix86_handle_option (opt, p, opt_set_p);
+
+ else if (type == ix86_opt_yes || type == ix86_opt_no)
+ {
+ if (type == ix86_opt_no)
+ opt_set_p = !opt_set_p;
+
+ if (opt_set_p)
+ target_flags |= mask;
+ else
+ target_flags &= ~mask;
+ }
+
+ else if (type == ix86_opt_str)
+ {
+ if (p_strings[opt])
+ {
+ error ("option(\"%s\") was already specified", opt_string);
+ ret = false;
+ }
+ else
+ p_strings[opt] = xstrdup (p + opt_len);
+ }
+
+ else
+ gcc_unreachable ();
+ }
+
+ return ret;
+}
+
+/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
+
+tree
+ix86_valid_target_attribute_tree (tree args)
+{
+ const char *orig_arch_string = ix86_arch_string;
+ const char *orig_tune_string = ix86_tune_string;
+ const char *orig_fpmath_string = ix86_fpmath_string;
+ int orig_tune_defaulted = ix86_tune_defaulted;
+ int orig_arch_specified = ix86_arch_specified;
+ char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
+ tree t = NULL_TREE;
+ int i;
+ struct cl_target_option *def
+ = TREE_TARGET_OPTION (target_option_default_node);
+
+ /* Process each of the options on the chain. */
+ if (! ix86_valid_target_attribute_inner_p (args, option_strings))
+ return NULL_TREE;
+
+ /* If the changed options are different from the default, rerun override_options,
+ and then save the options away. The string options are are attribute options,
+ and will be undone when we copy the save structure. */
+ if (ix86_isa_flags != def->ix86_isa_flags
+ || target_flags != def->target_flags
+ || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
+ || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
+ || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
+ {
+ /* If we are using the default tune= or arch=, undo the string assigned,
+ and use the default. */
+ if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
+ ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
+ else if (!orig_arch_specified)
+ ix86_arch_string = NULL;
+
+ if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
+ ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
+ else if (orig_tune_defaulted)
+ ix86_tune_string = NULL;
+
+ /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
+ if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
+ ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
+ else if (!TARGET_64BIT && TARGET_SSE)
+ ix86_fpmath_string = "sse,387";
+
+ /* Do any overrides, such as arch=xxx, or tune=xxx support. */
+ override_options (false);
+
+ /* Add any builtin functions with the new isa if any. */
+ ix86_add_new_builtins (ix86_isa_flags);
+
+ /* Save the current options unless we are validating options for
+ #pragma. */
+ t = build_target_option_node ();
+
+ ix86_arch_string = orig_arch_string;
+ ix86_tune_string = orig_tune_string;
+ ix86_fpmath_string = orig_fpmath_string;
+
+ /* Free up memory allocated to hold the strings */
+ for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
+ if (option_strings[i])
+ free (option_strings[i]);
+ }
+
+ return t;
+}
+
+/* Hook to validate attribute((target("string"))). */
+
+static bool
+ix86_valid_target_attribute_p (tree fndecl,
+ tree ARG_UNUSED (name),
+ tree args,
+ int ARG_UNUSED (flags))
+{
+ struct cl_target_option cur_target;
+ bool ret = true;
+ tree old_optimize = build_optimization_node ();
+ tree new_target, new_optimize;
+ tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
+
+ /* If the function changed the optimization levels as well as setting target
+ options, start with the optimizations specified. */
+ if (func_optimize && func_optimize != old_optimize)
+ cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
+
+ /* The target attributes may also change some optimization flags, so update
+ the optimization options if necessary. */
+ cl_target_option_save (&cur_target);
+ new_target = ix86_valid_target_attribute_tree (args);
+ new_optimize = build_optimization_node ();
+
+ if (!new_target)
+ ret = false;
+
+ else if (fndecl)
+ {
+ DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
+
+ if (old_optimize != new_optimize)
+ DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
+ }
+
+ cl_target_option_restore (&cur_target);
+
+ if (old_optimize != new_optimize)
+ cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
+
+ return ret;
+}
+
+
+/* Hook to determine if one function can safely inline another. */
+
+static bool
+ix86_can_inline_p (tree caller, tree callee)
+{
+ bool ret = false;
+ tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
+ tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
+
+ /* If callee has no option attributes, then it is ok to inline. */
+ if (!callee_tree)
+ ret = true;
+
+ /* If caller has no option attributes, but callee does then it is not ok to
+ inline. */
+ else if (!caller_tree)
+ ret = false;
+
+ else
+ {
+ struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
+ struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
+
+ /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
+ can inline a SSE2 function but a SSE2 function can't inline a SSE5
+ function. */
+ if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
+ != callee_opts->ix86_isa_flags)
+ ret = false;
+
+ /* See if we have the same non-isa options. */
+ else if (caller_opts->target_flags != callee_opts->target_flags)
+ ret = false;
+
+ /* See if arch, tune, etc. are the same. */
+ else if (caller_opts->arch != callee_opts->arch)
+ ret = false;
+
+ else if (caller_opts->tune != callee_opts->tune)
+ ret = false;
+
+ else if (caller_opts->fpmath != callee_opts->fpmath)
+ ret = false;
+
+ else if (caller_opts->branch_cost != callee_opts->branch_cost)
+ ret = false;
+
+ else
+ ret = true;
+ }
+
+ return ret;
+}
+
+
+/* Remember the last target of ix86_set_current_function. */
+static GTY(()) tree ix86_previous_fndecl;
+
+/* Establish appropriate back-end context for processing the function
+ FNDECL. The argument might be NULL to indicate processing at top
+ level, outside of any function scope. */
+static void
+ix86_set_current_function (tree fndecl)
+{
+ /* Only change the context if the function changes. This hook is called
+ several times in the course of compiling a function, and we don't want to
+ slow things down too much or call target_reinit when it isn't safe. */
+ if (fndecl && fndecl != ix86_previous_fndecl)
+ {
+ tree old_tree = (ix86_previous_fndecl
+ ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
+ : NULL_TREE);
+
+ tree new_tree = (fndecl
+ ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
+ : NULL_TREE);
+
+ ix86_previous_fndecl = fndecl;
+ if (old_tree == new_tree)
+ ;
+
+ else if (new_tree)
+ {
+ cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
+ target_reinit ();
+ }
+
+ else if (old_tree)
+ {
+ struct cl_target_option *def
+ = TREE_TARGET_OPTION (target_option_current_node);
+
+ cl_target_option_restore (def);
+ target_reinit ();
+ }
+ }
+}
+
+
+/* Return true if this goes in large data/bss. */
+
+static bool
+ix86_in_large_data_p (tree exp)
+{
+ if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
+ return false;
+
+ /* Functions are never large data. */
+ if (TREE_CODE (exp) == FUNCTION_DECL)
+ return false;
+
+ if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+ {
+ const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
+ if (strcmp (section, ".ldata") == 0
+ || strcmp (section, ".lbss") == 0)
+ return true;
+ return false;
+ }
+ else
+ {
+ HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+
+ /* If this is an incomplete type with size 0, then we can't put it
+ in data because it might be too big when completed. */
+ if (!size || size > ix86_section_threshold)
+ return true;
+ }
+
+ return false;
+}
+
+/* Switch to the appropriate section for output of DECL.
+ DECL is either a `VAR_DECL' node or a constant of some sort.
+ RELOC indicates whether forming the initial value of DECL requires
+ link-time relocations. */
+
+static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
+ ATTRIBUTE_UNUSED;
+
+static section *
+x86_64_elf_select_section (tree decl, int reloc,
+ unsigned HOST_WIDE_INT align)
+{
+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+ && ix86_in_large_data_p (decl))
+ {
+ const char *sname = NULL;
+ unsigned int flags = SECTION_WRITE;
+ switch (categorize_decl_for_section (decl, reloc))
+ {
+ case SECCAT_DATA:
+ sname = ".ldata";
+ break;
+ case SECCAT_DATA_REL:
+ sname = ".ldata.rel";
+ break;
+ case SECCAT_DATA_REL_LOCAL:
+ sname = ".ldata.rel.local";
+ break;
+ case SECCAT_DATA_REL_RO:
+ sname = ".ldata.rel.ro";
+ break;
+ case SECCAT_DATA_REL_RO_LOCAL:
+ sname = ".ldata.rel.ro.local";
+ break;
+ case SECCAT_BSS:
+ sname = ".lbss";
+ flags |= SECTION_BSS;
+ break;
+ case SECCAT_RODATA:
+ case SECCAT_RODATA_MERGE_STR:
+ case SECCAT_RODATA_MERGE_STR_INIT:
+ case SECCAT_RODATA_MERGE_CONST:
+ sname = ".lrodata";
+ flags = 0;
+ break;
+ case SECCAT_SRODATA:
+ case SECCAT_SDATA:
+ case SECCAT_SBSS:
+ gcc_unreachable ();
+ case SECCAT_TEXT:
+ case SECCAT_TDATA:
+ case SECCAT_TBSS:
+ /* We don't split these for medium model. Place them into
+ default sections and hope for best. */
+ break;
+ case SECCAT_EMUTLS_VAR:
+ case SECCAT_EMUTLS_TMPL:
+ gcc_unreachable ();
+ }
+ if (sname)
+ {
+ /* We might get called with string constants, but get_named_section
+ doesn't like them as they are not DECLs. Also, we need to set
+ flags in that case. */
+ if (!DECL_P (decl))
+ return get_section (sname, flags, NULL);
+ return get_named_section (decl, sname, reloc);
+ }
+ }
+ return default_elf_select_section (decl, reloc, align);
+}
+
+/* Build up a unique section name, expressed as a
+ STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
+ RELOC indicates whether the initial value of EXP requires
+ link-time relocations. */
+
+static void ATTRIBUTE_UNUSED
+x86_64_elf_unique_section (tree decl, int reloc)
+{
+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+ && ix86_in_large_data_p (decl))
+ {
+ const char *prefix = NULL;
+ /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
+ bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
+
+ switch (categorize_decl_for_section (decl, reloc))
+ {
+ case SECCAT_DATA:
+ case SECCAT_DATA_REL:
+ case SECCAT_DATA_REL_LOCAL:
+ case SECCAT_DATA_REL_RO:
+ case SECCAT_DATA_REL_RO_LOCAL:
+ prefix = one_only ? ".ld" : ".ldata";
+ break;
+ case SECCAT_BSS:
+ prefix = one_only ? ".lb" : ".lbss";
+ break;
+ case SECCAT_RODATA:
+ case SECCAT_RODATA_MERGE_STR:
+ case SECCAT_RODATA_MERGE_STR_INIT:
+ case SECCAT_RODATA_MERGE_CONST:
+ prefix = one_only ? ".lr" : ".lrodata";
+ break;
+ case SECCAT_SRODATA:
+ case SECCAT_SDATA:
+ case SECCAT_SBSS:
+ gcc_unreachable ();
+ case SECCAT_TEXT:
+ case SECCAT_TDATA:
+ case SECCAT_TBSS:
+ /* We don't split these for medium model. Place them into
+ default sections and hope for best. */
+ break;
+ case SECCAT_EMUTLS_VAR:
+ prefix = targetm.emutls.var_section;
+ break;
+ case SECCAT_EMUTLS_TMPL:
+ prefix = targetm.emutls.tmpl_section;
+ break;
+ }
+ if (prefix)
+ {
+ const char *name, *linkonce;
+ char *string;
+
+ name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+ name = targetm.strip_name_encoding (name);
+
+ /* If we're using one_only, then there needs to be a .gnu.linkonce
+ prefix to the section name. */
+ linkonce = one_only ? ".gnu.linkonce" : "";
+
+ string = ACONCAT ((linkonce, prefix, ".", name, NULL));
+
+ DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
+ return;
+ }
+ }
+ default_unique_section (decl, reloc);
+}
+
+#ifdef COMMON_ASM_OP
+/* This says how to output assembler code to declare an
+ uninitialized external linkage data object.
+
+ For medium model x86-64 we need to use .largecomm opcode for
+ large objects. */
+void
+x86_elf_aligned_common (FILE *file,
+ const char *name, unsigned HOST_WIDE_INT size,
+ int align)
+{
+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+ && size > (unsigned int)ix86_section_threshold)
+ fprintf (file, ".largecomm\t");
+ else
+ fprintf (file, "%s", COMMON_ASM_OP);
+ assemble_name (file, name);
+ fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
+ size, align / BITS_PER_UNIT);
+}
+#endif
+
+/* Utility function for targets to use in implementing
+ ASM_OUTPUT_ALIGNED_BSS. */
+
+void
+x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
+ const char *name, unsigned HOST_WIDE_INT size,
+ int align)
+{
+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
+ && size > (unsigned int)ix86_section_threshold)
+ switch_to_section (get_named_section (decl, ".lbss", 0));
+ else
+ switch_to_section (bss_section);
+ ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
+#ifdef ASM_DECLARE_OBJECT_NAME
+ last_assemble_variable_decl = decl;
+ ASM_DECLARE_OBJECT_NAME (file, name, decl);
+#else
+ /* Standard thing is just output label for the object. */
+ ASM_OUTPUT_LABEL (file, name);
+#endif /* ASM_DECLARE_OBJECT_NAME */
+ ASM_OUTPUT_SKIP (file, size ? size : 1);
+}
+
+void
+optimization_options (int level, int size ATTRIBUTE_UNUSED)
+{
+ /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
+ make the problem with not enough registers even worse. */
+#ifdef INSN_SCHEDULING
+ if (level > 1)
+ flag_schedule_insns = 0;
+#endif
+
+ /* For -O2 and beyond, turn on -fzee for x86_64 target. */
+ if (level > 1 && TARGET_64BIT)
+ flag_zee = 1;
+
+ if (TARGET_MACHO)
+ /* The Darwin libraries never set errno, so we might as well
+ avoid calling them when that's the only reason we would. */
+ flag_errno_math = 0;
+
+ /* The default values of these switches depend on the TARGET_64BIT
+ that is not known at this moment. Mark these values with 2 and
+ let user the to override these. In case there is no command line option
+ specifying them, we will set the defaults in override_options. */
+ if (optimize >= 1)
+ flag_omit_frame_pointer = 2;
+ flag_pcc_struct_return = 2;
+ flag_asynchronous_unwind_tables = 2;
+ flag_vect_cost_model = 1;
+#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
+ SUBTARGET_OPTIMIZATION_OPTIONS;
+#endif
+}
+
+/* Decide whether we can make a sibling call to a function. DECL is the
+ declaration of the function being targeted by the call and EXP is the
+ CALL_EXPR representing the call. */
+
+static bool
+ix86_function_ok_for_sibcall (tree decl, tree exp)
+{
+ tree func;
+ rtx a, b;
+
+ /* If we are generating position-independent code, we cannot sibcall
+ optimize any indirect call, or a direct call to a global function,
+ as the PLT requires %ebx be live. */
+ if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
+ return false;
+
+ if (decl)
+ func = decl;
+ else
+ {
+ func = TREE_TYPE (CALL_EXPR_FN (exp));
+ if (POINTER_TYPE_P (func))
+ func = TREE_TYPE (func);
+ }
+
+ /* Check that the return value locations are the same. Like
+ if we are returning floats on the 80387 register stack, we cannot
+ make a sibcall from a function that doesn't return a float to a
+ function that does or, conversely, from a function that does return
+ a float to a function that doesn't; the necessary stack adjustment
+ would not be executed. This is also the place we notice
+ differences in the return value ABI. Note that it is ok for one
+ of the functions to have void return type as long as the return
+ value of the other is passed in a register. */
+ a = ix86_function_value (TREE_TYPE (exp), func, false);
+ b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
+ cfun->decl, false);
+ if (STACK_REG_P (a) || STACK_REG_P (b))
+ {
+ if (!rtx_equal_p (a, b))
+ return false;
+ }
+ else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
+ ;
+ else if (!rtx_equal_p (a, b))
+ return false;
+
+ /* If this call is indirect, we'll need to be able to use a call-clobbered
+ register for the address of the target function. Make sure that all
+ such registers are not used for passing parameters. */
+ if (!decl && !TARGET_64BIT)
+ {
+ tree type;
+
+ /* We're looking at the CALL_EXPR, we need the type of the function. */
+ type = CALL_EXPR_FN (exp); /* pointer expression */
+ type = TREE_TYPE (type); /* pointer type */
+ type = TREE_TYPE (type); /* function type */
+
+ if (ix86_function_regparm (type, NULL) >= 3)
+ {
+ /* ??? Need to count the actual number of registers to be used,
+ not the possible number of registers. Fix later. */
+ return false;
+ }
+ }
+
+ /* Dllimport'd functions are also called indirectly. */
+ if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
+ && !TARGET_64BIT
+ && decl && DECL_DLLIMPORT_P (decl)
+ && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
+ return false;
+
+ /* If we need to align the outgoing stack, then sibcalling would
+ unalign the stack, which may break the called function. */
+ if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
+ return false;
+
+ /* Otherwise okay. That also includes certain types of indirect calls. */
+ return true;
+}
+
+/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
+ calling convention attributes;
+ arguments as in struct attribute_spec.handler. */
+
+static tree
+ix86_handle_cconv_attribute (tree *node, tree name,
+ tree args,
+ int flags ATTRIBUTE_UNUSED,
+ bool *no_add_attrs)
+{
+ if (TREE_CODE (*node) != FUNCTION_TYPE
+ && TREE_CODE (*node) != METHOD_TYPE
+ && TREE_CODE (*node) != FIELD_DECL
+ && TREE_CODE (*node) != TYPE_DECL)
+ {
+ warning (OPT_Wattributes, "%qs attribute only applies to functions",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ return NULL_TREE;
+ }
+
+ /* Can combine regparm with all attributes but fastcall. */
+ if (is_attribute_p ("regparm", name))
+ {
+ tree cst;
+
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and regparm attributes are not compatible");
+ }
+
+ cst = TREE_VALUE (args);
+ if (TREE_CODE (cst) != INTEGER_CST)
+ {
+ warning (OPT_Wattributes,
+ "%qs attribute requires an integer constant argument",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+ else if (compare_tree_int (cst, REGPARM_MAX) > 0)
+ {
+ warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
+ IDENTIFIER_POINTER (name), REGPARM_MAX);
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+ }
+
+ if (TARGET_64BIT)
+ {
+ /* Do not warn when emulating the MS ABI. */
+ if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
+ warning (OPT_Wattributes, "%qs attribute ignored",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ return NULL_TREE;
+ }
+
+ /* Can combine fastcall with stdcall (redundant) and sseregparm. */
+ if (is_attribute_p ("fastcall", name))
+ {
+ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and cdecl attributes are not compatible");
+ }
+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and stdcall attributes are not compatible");
+ }
+ if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and regparm attributes are not compatible");
+ }
+ }
+
+ /* Can combine stdcall with fastcall (redundant), regparm and
+ sseregparm. */
+ else if (is_attribute_p ("stdcall", name))
+ {
+ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("stdcall and cdecl attributes are not compatible");
+ }
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("stdcall and fastcall attributes are not compatible");
+ }
+ }
+
+ /* Can combine cdecl with regparm and sseregparm. */
+ else if (is_attribute_p ("cdecl", name))
+ {
+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("stdcall and cdecl attributes are not compatible");
+ }
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and cdecl attributes are not compatible");
+ }
+ }
+
+ /* Can combine sseregparm with all attributes. */
+
+ return NULL_TREE;
+}
+
+/* Return 0 if the attributes for two types are incompatible, 1 if they
+ are compatible, and 2 if they are nearly compatible (which causes a
+ warning to be generated). */
+
+static int
+ix86_comp_type_attributes (const_tree type1, const_tree type2)
+{
+ /* Check for mismatch of non-default calling convention. */
+ const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
+
+ if (TREE_CODE (type1) != FUNCTION_TYPE
+ && TREE_CODE (type1) != METHOD_TYPE)
+ return 1;
+
+ /* Check for mismatched fastcall/regparm types. */
+ if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
+ != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
+ || (ix86_function_regparm (type1, NULL)
+ != ix86_function_regparm (type2, NULL)))
+ return 0;
+
+ /* Check for mismatched sseregparm types. */
+ if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
+ != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
+ return 0;
+
+ /* Check for mismatched return types (cdecl vs stdcall). */
+ if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
+ != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
+ return 0;
+
+ return 1;
+}
+
+/* Return the regparm value for a function with the indicated TYPE and DECL.
+ DECL may be NULL when calling function indirectly
+ or considering a libcall. */
+
+static int
+ix86_function_regparm (const_tree type, const_tree decl)
+{
+ tree attr;
+ int regparm;
+
+ static bool error_issued;
+
+ if (TARGET_64BIT)
+ return (ix86_function_type_abi (type) == SYSV_ABI
+ ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
+
+ regparm = ix86_regparm;
+ attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
+ if (attr)
+ {
+ regparm
+ = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
+
+ if (decl && TREE_CODE (decl) == FUNCTION_DECL)
+ {
+ /* We can't use regparm(3) for nested functions because
+ these pass static chain pointer in %ecx register. */
+ if (!error_issued && regparm == 3
+ && decl_function_context (decl)
+ && !DECL_NO_STATIC_CHAIN (decl))
+ {
+ error ("nested functions are limited to 2 register parameters");
+ error_issued = true;
+ return 0;
+ }
+ }
+
+ return regparm;
+ }
+
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
+ return 2;
+
+ /* Use register calling convention for local functions when possible. */
+ if (decl
+ && TREE_CODE (decl) == FUNCTION_DECL
+ && optimize
+ && !profile_flag)
+ {
+ /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
+ struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
+ if (i && i->local)
+ {
+ int local_regparm, globals = 0, regno;
+ struct function *f;
+
+ /* Make sure no regparm register is taken by a
+ fixed register variable. */
+ for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
+ if (fixed_regs[local_regparm])
+ break;
+
+ /* We can't use regparm(3) for nested functions as these use
+ static chain pointer in third argument. */
+ if (local_regparm == 3
+ && decl_function_context (decl)
+ && !DECL_NO_STATIC_CHAIN (decl))
+ local_regparm = 2;
+
+ /* If the function realigns its stackpointer, the prologue will
+ clobber %ecx. If we've already generated code for the callee,
+ the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
+ scanning the attributes for the self-realigning property. */
+ f = DECL_STRUCT_FUNCTION (decl);
+ /* Since current internal arg pointer won't conflict with
+ parameter passing regs, so no need to change stack
+ realignment and adjust regparm number.
+
+ Each fixed register usage increases register pressure,
+ so less registers should be used for argument passing.
+ This functionality can be overriden by an explicit
+ regparm value. */
+ for (regno = 0; regno <= DI_REG; regno++)
+ if (fixed_regs[regno])
+ globals++;
+
+ local_regparm
+ = globals < local_regparm ? local_regparm - globals : 0;
+
+ if (local_regparm > regparm)
+ regparm = local_regparm;
+ }
+ }
+
+ return regparm;
+}
+
+/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
+ DFmode (2) arguments in SSE registers for a function with the
+ indicated TYPE and DECL. DECL may be NULL when calling function
+ indirectly or considering a libcall. Otherwise return 0. */
+
+static int
+ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
+{
+ gcc_assert (!TARGET_64BIT);
+
+ /* Use SSE registers to pass SFmode and DFmode arguments if requested
+ by the sseregparm attribute. */
+ if (TARGET_SSEREGPARM
+ || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
+ {
+ if (!TARGET_SSE)
+ {
+ if (warn)
+ {
+ if (decl)
+ error ("Calling %qD with attribute sseregparm without "
+ "SSE/SSE2 enabled", decl);
+ else
+ error ("Calling %qT with attribute sseregparm without "
+ "SSE/SSE2 enabled", type);
+ }
+ return 0;
+ }
+
+ return 2;
+ }
+
+ /* For local functions, pass up to SSE_REGPARM_MAX SFmode
+ (and DFmode for SSE2) arguments in SSE registers. */
+ if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
+ {
+ /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
+ struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
+ if (i && i->local)
+ return TARGET_SSE2 ? 2 : 1;
+ }
+
+ return 0;
+}
+
+/* Return true if EAX is live at the start of the function. Used by
+ ix86_expand_prologue to determine if we need special help before
+ calling allocate_stack_worker. */
+
+static bool
+ix86_eax_live_at_start_p (void)
+{
+ /* Cheat. Don't bother working forward from ix86_function_regparm
+ to the function type to whether an actual argument is located in
+ eax. Instead just look at cfg info, which is still close enough
+ to correct at this point. This gives false positives for broken
+ functions that might use uninitialized data that happens to be
+ allocated in eax, but who cares? */
+ return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
+}
+
+/* Value is the number of bytes of arguments automatically
+ popped when returning from a subroutine call.
+ FUNDECL is the declaration node of the function (as a tree),
+ FUNTYPE is the data type of the function (as a tree),
+ or for a library call it is an identifier node for the subroutine name.
+ SIZE is the number of bytes of arguments passed on the stack.
+
+ On the 80386, the RTD insn may be used to pop them if the number
+ of args is fixed, but if the number is variable then the caller
+ must pop them all. RTD can't be used for library calls now
+ because the library is compiled with the Unix compiler.
+ Use of RTD is a selectable option, since it is incompatible with
+ standard Unix calling sequences. If the option is not selected,
+ the caller must always pop the args.
+
+ The attribute stdcall is equivalent to RTD on a per module basis. */
+
+int
+ix86_return_pops_args (tree fundecl, tree funtype, int size)
+{
+ int rtd;
+
+ /* None of the 64-bit ABIs pop arguments. */
+ if (TARGET_64BIT)
+ return 0;
+
+ rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
+
+ /* Cdecl functions override -mrtd, and never pop the stack. */
+ if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
+ {
+ /* Stdcall and fastcall functions will pop the stack if not
+ variable args. */
+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
+ || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
+ rtd = 1;
+
+ if (rtd && ! stdarg_p (funtype))
+ return size;
+ }
+
+ /* Lose any fake structure return argument if it is passed on the stack. */
+ if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
+ && !KEEP_AGGREGATE_RETURN_POINTER)
+ {
+ int nregs = ix86_function_regparm (funtype, fundecl);
+ if (nregs == 0)
+ return GET_MODE_SIZE (Pmode);
+ }
+
+ return 0;
+}
+
+/* Argument support functions. */
+
+/* Return true when register may be used to pass function parameters. */
+bool
+ix86_function_arg_regno_p (int regno)
+{
+ int i;
+ const int *parm_regs;
+
+ if (!TARGET_64BIT)
+ {
+ if (TARGET_MACHO)
+ return (regno < REGPARM_MAX
+ || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
+ else
+ return (regno < REGPARM_MAX
+ || (TARGET_MMX && MMX_REGNO_P (regno)
+ && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
+ || (TARGET_SSE && SSE_REGNO_P (regno)
+ && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
+ }
+
+ if (TARGET_MACHO)
+ {
+ if (SSE_REGNO_P (regno) && TARGET_SSE)
+ return true;
+ }
+ else
+ {
+ if (TARGET_SSE && SSE_REGNO_P (regno)
+ && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
+ return true;
+ }
+
+ /* TODO: The function should depend on current function ABI but
+ builtins.c would need updating then. Therefore we use the
+ default ABI. */
+
+ /* RAX is used as hidden argument to va_arg functions. */
+ if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
+ return true;
+
+ if (DEFAULT_ABI == MS_ABI)
+ parm_regs = x86_64_ms_abi_int_parameter_registers;
+ else
+ parm_regs = x86_64_int_parameter_registers;
+ for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
+ : X86_64_REGPARM_MAX); i++)
+ if (regno == parm_regs[i])
+ return true;
+ return false;
+}
+
+/* Return if we do not know how to pass TYPE solely in registers. */
+
+static bool
+ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
+{
+ if (must_pass_in_stack_var_size_or_pad (mode, type))
+ return true;
+
+ /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
+ The layout_type routine is crafty and tries to trick us into passing
+ currently unsupported vector types on the stack by using TImode. */
+ return (!TARGET_64BIT && mode == TImode
+ && type && TREE_CODE (type) != VECTOR_TYPE);
+}
+
+/* It returns the size, in bytes, of the area reserved for arguments passed
+ in registers for the function represented by fndecl dependent to the used
+ abi format. */
+int
+ix86_reg_parm_stack_space (const_tree fndecl)
+{
+ int call_abi = SYSV_ABI;
+ if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
+ call_abi = ix86_function_abi (fndecl);
+ else
+ call_abi = ix86_function_type_abi (fndecl);
+ if (call_abi == MS_ABI)
+ return 32;
+ return 0;
+}
+
+/* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
+ call abi used. */
+int
+ix86_function_type_abi (const_tree fntype)
+{
+ if (TARGET_64BIT && fntype != NULL)
+ {
+ int abi;
+ if (DEFAULT_ABI == SYSV_ABI)
+ abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
+ else
+ abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
+
+ return abi;
+ }
+ return DEFAULT_ABI;
+}
+
+int
+ix86_function_abi (const_tree fndecl)
+{
+ if (! fndecl)
+ return DEFAULT_ABI;
+ return ix86_function_type_abi (TREE_TYPE (fndecl));
+}
+
+/* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
+ call abi used. */
+int
+ix86_cfun_abi (void)
+{
+ if (! cfun || ! TARGET_64BIT)
+ return DEFAULT_ABI;
+ return cfun->machine->call_abi;
+}
+
+/* regclass.c */
+extern void init_regs (void);
+
+/* Implementation of call abi switching target hook. Specific to FNDECL
+ the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
+ for more details. */
+void
+ix86_call_abi_override (const_tree fndecl)
+{
+ if (fndecl == NULL_TREE)
+ cfun->machine->call_abi = DEFAULT_ABI;
+ else
+ cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
+}
+
+/* MS and SYSV ABI have different set of call used registers. Avoid expensive
+ re-initialization of init_regs each time we switch function context since
+ this is needed only during RTL expansion. */
+static void
+ix86_maybe_switch_abi (void)
+{
+ if (TARGET_64BIT &&
+ call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
+ reinit_regs ();
+}
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+ for a call to a function whose data type is FNTYPE.
+ For a library call, FNTYPE is 0. */
+
+void
+init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
+ tree fntype, /* tree ptr for function decl */
+ rtx libname, /* SYMBOL_REF of library name or 0 */
+ tree fndecl)
+{
+ struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
+ memset (cum, 0, sizeof (*cum));
+
+ if (fndecl)
+ cum->call_abi = ix86_function_abi (fndecl);
+ else
+ cum->call_abi = ix86_function_type_abi (fntype);
+ /* Set up the number of registers to use for passing arguments. */
+
+ if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
+ sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
+ cum->nregs = ix86_regparm;
+ if (TARGET_64BIT)
+ {
+ if (cum->call_abi != DEFAULT_ABI)
+ cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
+ : X64_REGPARM_MAX;
+ }
+ if (TARGET_SSE)
+ {
+ cum->sse_nregs = SSE_REGPARM_MAX;
+ if (TARGET_64BIT)
+ {
+ if (cum->call_abi != DEFAULT_ABI)
+ cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
+ : X64_SSE_REGPARM_MAX;
+ }
+ }
+ if (TARGET_MMX)
+ cum->mmx_nregs = MMX_REGPARM_MAX;
+ cum->warn_avx = true;
+ cum->warn_sse = true;
+ cum->warn_mmx = true;
+
+ /* Because type might mismatch in between caller and callee, we need to
+ use actual type of function for local calls.
+ FIXME: cgraph_analyze can be told to actually record if function uses
+ va_start so for local functions maybe_vaarg can be made aggressive
+ helping K&R code.
+ FIXME: once typesytem is fixed, we won't need this code anymore. */
+ if (i && i->local)
+ fntype = TREE_TYPE (fndecl);
+ cum->maybe_vaarg = (fntype
+ ? (!prototype_p (fntype) || stdarg_p (fntype))
+ : !libname);
+
+ if (!TARGET_64BIT)
+ {
+ /* If there are variable arguments, then we won't pass anything
+ in registers in 32-bit mode. */
+ if (stdarg_p (fntype))
+ {
+ cum->nregs = 0;
+ cum->sse_nregs = 0;
+ cum->mmx_nregs = 0;
+ cum->warn_avx = 0;
+ cum->warn_sse = 0;
+ cum->warn_mmx = 0;
+ return;
+ }
+
+ /* Use ecx and edx registers if function has fastcall attribute,
+ else look for regparm information. */
+ if (fntype)
+ {
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
+ {
+ cum->nregs = 2;
+ cum->fastcall = 1;
+ }
+ else
+ cum->nregs = ix86_function_regparm (fntype, fndecl);
+ }
+
+ /* Set up the number of SSE registers used for passing SFmode
+ and DFmode arguments. Warn for mismatching ABI. */
+ cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
+ }
+}
+
+/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
+ But in the case of vector types, it is some vector mode.
+
+ When we have only some of our vector isa extensions enabled, then there
+ are some modes for which vector_mode_supported_p is false. For these
+ modes, the generic vector support in gcc will choose some non-vector mode
+ in order to implement the type. By computing the natural mode, we'll
+ select the proper ABI location for the operand and not depend on whatever
+ the middle-end decides to do with these vector types.
+
+ The midde-end can't deal with the vector types > 16 bytes. In this
+ case, we return the original mode and warn ABI change if CUM isn't
+ NULL. */
+
+static enum machine_mode
+type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
+{
+ enum machine_mode mode = TYPE_MODE (type);
+
+ if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
+ {
+ HOST_WIDE_INT size = int_size_in_bytes (type);
+ if ((size == 8 || size == 16 || size == 32)
+ /* ??? Generic code allows us to create width 1 vectors. Ignore. */
+ && TYPE_VECTOR_SUBPARTS (type) > 1)
+ {
+ enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
+
+ if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
+ mode = MIN_MODE_VECTOR_FLOAT;
+ else
+ mode = MIN_MODE_VECTOR_INT;
+
+ /* Get the mode which has this inner mode and number of units. */
+ for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
+ if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
+ && GET_MODE_INNER (mode) == innermode)
+ {
+ if (size == 32 && !TARGET_AVX)
+ {
+ static bool warnedavx;
+
+ if (cum
+ && !warnedavx
+ && cum->warn_avx)
+ {
+ warnedavx = true;
+ warning (0, "AVX vector argument without AVX "
+ "enabled changes the ABI");
+ }
+ return TYPE_MODE (type);
+ }
+ else
+ return mode;
+ }
+
+ gcc_unreachable ();
+ }
+ }
+
+ return mode;
+}
+
+/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
+ this may not agree with the mode that the type system has chosen for the
+ register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
+ go ahead and use it. Otherwise we have to build a PARALLEL instead. */
+
+static rtx
+gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
+ unsigned int regno)
+{
+ rtx tmp;
+
+ if (orig_mode != BLKmode)
+ tmp = gen_rtx_REG (orig_mode, regno);
+ else
+ {
+ tmp = gen_rtx_REG (mode, regno);
+ tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
+ }
+
+ return tmp;
+}
+
+/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
+ of this code is to classify each 8bytes of incoming argument by the register
+ class and assign registers accordingly. */
+
+/* Return the union class of CLASS1 and CLASS2.
+ See the x86-64 PS ABI for details. */
+
+static enum x86_64_reg_class
+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
+{
+ /* Rule #1: If both classes are equal, this is the resulting class. */
+ if (class1 == class2)
+ return class1;
+
+ /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
+ the other class. */
+ if (class1 == X86_64_NO_CLASS)
+ return class2;
+ if (class2 == X86_64_NO_CLASS)
+ return class1;
+
+ /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
+ if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
+ return X86_64_MEMORY_CLASS;
+
+ /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
+ if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
+ || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
+ return X86_64_INTEGERSI_CLASS;
+ if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
+ || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
+ return X86_64_INTEGER_CLASS;
+
+ /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
+ MEMORY is used. */
+ if (class1 == X86_64_X87_CLASS
+ || class1 == X86_64_X87UP_CLASS
+ || class1 == X86_64_COMPLEX_X87_CLASS
+ || class2 == X86_64_X87_CLASS
+ || class2 == X86_64_X87UP_CLASS
+ || class2 == X86_64_COMPLEX_X87_CLASS)
+ return X86_64_MEMORY_CLASS;
+
+ /* Rule #6: Otherwise class SSE is used. */
+ return X86_64_SSE_CLASS;
+}
+
+/* Classify the argument of type TYPE and mode MODE.
+ CLASSES will be filled by the register class used to pass each word
+ of the operand. The number of words is returned. In case the parameter
+ should be passed in memory, 0 is returned. As a special case for zero
+ sized containers, classes[0] will be NO_CLASS and 1 is returned.
+
+ BIT_OFFSET is used internally for handling records and specifies offset
+ of the offset in bits modulo 256 to avoid overflow cases.
+
+ See the x86-64 PS ABI for details.
+*/
+
+static int
+classify_argument (enum machine_mode mode, const_tree type,
+ enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
+{
+ HOST_WIDE_INT bytes =
+ (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+ int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ /* Variable sized entities are always passed/returned in memory. */
+ if (bytes < 0)
+ return 0;
+
+ if (mode != VOIDmode
+ && targetm.calls.must_pass_in_stack (mode, type))
+ return 0;
+
+ if (type && AGGREGATE_TYPE_P (type))
+ {
+ int i;
+ tree field;
+ enum x86_64_reg_class subclasses[MAX_CLASSES];
+
+ /* On x86-64 we pass structures larger than 32 bytes on the stack. */
+ if (bytes > 32)
+ return 0;
+
+ for (i = 0; i < words; i++)
+ classes[i] = X86_64_NO_CLASS;
+
+ /* Zero sized arrays or structures are NO_CLASS. We return 0 to
+ signalize memory class, so handle it as special case. */
+ if (!words)
+ {
+ classes[0] = X86_64_NO_CLASS;
+ return 1;
+ }
+
+ /* Classify each field of record and merge classes. */
+ switch (TREE_CODE (type))
+ {
+ case RECORD_TYPE:
+ /* And now merge the fields of structure. */
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) == FIELD_DECL)
+ {
+ int num;
+
+ if (TREE_TYPE (field) == error_mark_node)
+ continue;
+
+ /* Bitfields are always classified as integer. Handle them
+ early, since later code would consider them to be
+ misaligned integers. */
+ if (DECL_BIT_FIELD (field))
+ {
+ for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
+ i < ((int_bit_position (field) + (bit_offset % 64))
+ + tree_low_cst (DECL_SIZE (field), 0)
+ + 63) / 8 / 8; i++)
+ classes[i] =
+ merge_classes (X86_64_INTEGER_CLASS,
+ classes[i]);
+ }
+ else
+ {
+ type = TREE_TYPE (field);
+
+ /* Flexible array member is ignored. */
+ if (TYPE_MODE (type) == BLKmode
+ && TREE_CODE (type) == ARRAY_TYPE
+ && TYPE_SIZE (type) == NULL_TREE
+ && TYPE_DOMAIN (type) != NULL_TREE
+ && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
+ == NULL_TREE))
+ {
+ static bool warned;
+
+ if (!warned && warn_psabi)
+ {
+ warned = true;
+ inform (input_location,
+ "The ABI of passing struct with"
+ " a flexible array member has"
+ " changed in GCC 4.4");
+ }
+ continue;
+ }
+ num = classify_argument (TYPE_MODE (type), type,
+ subclasses,
+ (int_bit_position (field)
+ + bit_offset) % 256);
+ if (!num)
+ return 0;
+ for (i = 0; i < num; i++)
+ {
+ int pos =
+ (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
+ classes[i + pos] =
+ merge_classes (subclasses[i], classes[i + pos]);
+ }
+ }
+ }
+ }
+ break;
+
+ case ARRAY_TYPE:
+ /* Arrays are handled as small records. */
+ {
+ int num;
+ num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
+ TREE_TYPE (type), subclasses, bit_offset);
+ if (!num)
+ return 0;
+
+ /* The partial classes are now full classes. */
+ if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
+ subclasses[0] = X86_64_SSE_CLASS;
+ if (subclasses[0] == X86_64_INTEGERSI_CLASS
+ && !((bit_offset % 64) == 0 && bytes == 4))
+ subclasses[0] = X86_64_INTEGER_CLASS;
+
+ for (i = 0; i < words; i++)
+ classes[i] = subclasses[i % num];
+
+ break;
+ }
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ /* Unions are similar to RECORD_TYPE but offset is always 0.
+ */
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) == FIELD_DECL)
+ {
+ int num;
+
+ if (TREE_TYPE (field) == error_mark_node)
+ continue;
+
+ num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
+ TREE_TYPE (field), subclasses,
+ bit_offset);
+ if (!num)
+ return 0;
+ for (i = 0; i < num; i++)
+ classes[i] = merge_classes (subclasses[i], classes[i]);
+ }
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (words > 2)
+ {
+ /* When size > 16 bytes, if the first one isn't
+ X86_64_SSE_CLASS or any other ones aren't
+ X86_64_SSEUP_CLASS, everything should be passed in
+ memory. */
+ if (classes[0] != X86_64_SSE_CLASS)
+ return 0;
+
+ for (i = 1; i < words; i++)
+ if (classes[i] != X86_64_SSEUP_CLASS)
+ return 0;
+ }
+
+ /* Final merger cleanup. */
+ for (i = 0; i < words; i++)
+ {
+ /* If one class is MEMORY, everything should be passed in
+ memory. */
+ if (classes[i] == X86_64_MEMORY_CLASS)
+ return 0;
+
+ /* The X86_64_SSEUP_CLASS should be always preceded by
+ X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
+ if (classes[i] == X86_64_SSEUP_CLASS
+ && classes[i - 1] != X86_64_SSE_CLASS
+ && classes[i - 1] != X86_64_SSEUP_CLASS)
+ {
+ /* The first one should never be X86_64_SSEUP_CLASS. */
+ gcc_assert (i != 0);
+ classes[i] = X86_64_SSE_CLASS;
+ }
+
+ /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+ everything should be passed in memory. */
+ if (classes[i] == X86_64_X87UP_CLASS
+ && (classes[i - 1] != X86_64_X87_CLASS))
+ {
+ static bool warned;
+
+ /* The first one should never be X86_64_X87UP_CLASS. */
+ gcc_assert (i != 0);
+ if (!warned && warn_psabi)
+ {
+ warned = true;
+ inform (input_location,
+ "The ABI of passing union with long double"
+ " has changed in GCC 4.4");
+ }
+ return 0;
+ }
+ }
+ return words;
+ }
+
+ /* Compute alignment needed. We align all types to natural boundaries with
+ exception of XFmode that is aligned to 64bits. */
+ if (mode != VOIDmode && mode != BLKmode)
+ {
+ int mode_alignment = GET_MODE_BITSIZE (mode);
+
+ if (mode == XFmode)
+ mode_alignment = 128;
+ else if (mode == XCmode)
+ mode_alignment = 256;
+ if (COMPLEX_MODE_P (mode))
+ mode_alignment /= 2;
+ /* Misaligned fields are always returned in memory. */
+ if (bit_offset % mode_alignment)
+ return 0;
+ }
+
+ /* for V1xx modes, just use the base mode */
+ if (VECTOR_MODE_P (mode) && mode != V1DImode
+ && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
+ mode = GET_MODE_INNER (mode);
+
+ /* Classification of atomic types. */
+ switch (mode)
+ {
+ case SDmode:
+ case DDmode:
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case TDmode:
+ classes[0] = X86_64_SSE_CLASS;
+ classes[1] = X86_64_SSEUP_CLASS;
+ return 2;
+ case DImode:
+ case SImode:
+ case HImode:
+ case QImode:
+ case CSImode:
+ case CHImode:
+ case CQImode:
+ {
+ int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
+
+ if (size <= 32)
+ {
+ classes[0] = X86_64_INTEGERSI_CLASS;
+ return 1;
+ }
+ else if (size <= 64)
+ {
+ classes[0] = X86_64_INTEGER_CLASS;
+ return 1;
+ }
+ else if (size <= 64+32)
+ {
+ classes[0] = X86_64_INTEGER_CLASS;
+ classes[1] = X86_64_INTEGERSI_CLASS;
+ return 2;
+ }
+ else if (size <= 64+64)
+ {
+ classes[0] = classes[1] = X86_64_INTEGER_CLASS;
+ return 2;
+ }
+ else
+ gcc_unreachable ();
+ }
+ case CDImode:
+ case TImode:
+ classes[0] = classes[1] = X86_64_INTEGER_CLASS;
+ return 2;
+ case COImode:
+ case OImode:
+ /* OImode shouldn't be used directly. */
+ gcc_unreachable ();
+ case CTImode:
+ return 0;
+ case SFmode:
+ if (!(bit_offset % 64))
+ classes[0] = X86_64_SSESF_CLASS;
+ else
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case DFmode:
+ classes[0] = X86_64_SSEDF_CLASS;
+ return 1;
+ case XFmode:
+ classes[0] = X86_64_X87_CLASS;
+ classes[1] = X86_64_X87UP_CLASS;
+ return 2;
+ case TFmode:
+ classes[0] = X86_64_SSE_CLASS;
+ classes[1] = X86_64_SSEUP_CLASS;
+ return 2;
+ case SCmode:
+ classes[0] = X86_64_SSE_CLASS;
+ if (!(bit_offset % 64))
+ return 1;
+ else
+ {
+ static bool warned;
+
+ if (!warned && warn_psabi)
+ {
+ warned = true;
+ inform (input_location,
+ "The ABI of passing structure with complex float"
+ " member has changed in GCC 4.4");
+ }
+ classes[1] = X86_64_SSESF_CLASS;
+ return 2;
+ }
+ case DCmode:
+ classes[0] = X86_64_SSEDF_CLASS;
+ classes[1] = X86_64_SSEDF_CLASS;
+ return 2;
+ case XCmode:
+ classes[0] = X86_64_COMPLEX_X87_CLASS;
+ return 1;
+ case TCmode:
+ /* This modes is larger than 16 bytes. */
+ return 0;
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
+ classes[0] = X86_64_SSE_CLASS;
+ classes[1] = X86_64_SSEUP_CLASS;
+ classes[2] = X86_64_SSEUP_CLASS;
+ classes[3] = X86_64_SSEUP_CLASS;
+ return 4;
+ case V4SFmode:
+ case V4SImode:
+ case V16QImode:
+ case V8HImode:
+ case V2DFmode:
+ case V2DImode:
+ classes[0] = X86_64_SSE_CLASS;
+ classes[1] = X86_64_SSEUP_CLASS;
+ return 2;
+ case V1DImode:
+ case V2SFmode:
+ case V2SImode:
+ case V4HImode:
+ case V8QImode:
+ classes[0] = X86_64_SSE_CLASS;
+ return 1;
+ case BLKmode:
+ case VOIDmode:
+ return 0;
+ default:
+ gcc_assert (VECTOR_MODE_P (mode));
+
+ if (bytes > 16)
+ return 0;
+
+ gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
+
+ if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
+ classes[0] = X86_64_INTEGERSI_CLASS;
+ else
+ classes[0] = X86_64_INTEGER_CLASS;
+ classes[1] = X86_64_INTEGER_CLASS;
+ return 1 + (bytes > 8);
+ }
+}
+
+/* Examine the argument and return set number of register required in each
+ class. Return 0 iff parameter should be passed in memory. */
+static int
+examine_argument (enum machine_mode mode, const_tree type, int in_return,
+ int *int_nregs, int *sse_nregs)
+{
+ enum x86_64_reg_class regclass[MAX_CLASSES];
+ int n = classify_argument (mode, type, regclass, 0);
+
+ *int_nregs = 0;
+ *sse_nregs = 0;
+ if (!n)
+ return 0;
+ for (n--; n >= 0; n--)
+ switch (regclass[n])
+ {
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ (*int_nregs)++;
+ break;
+ case X86_64_SSE_CLASS:
+ case X86_64_SSESF_CLASS:
+ case X86_64_SSEDF_CLASS:
+ (*sse_nregs)++;
+ break;
+ case X86_64_NO_CLASS:
+ case X86_64_SSEUP_CLASS:
+ break;
+ case X86_64_X87_CLASS:
+ case X86_64_X87UP_CLASS:
+ if (!in_return)
+ return 0;
+ break;
+ case X86_64_COMPLEX_X87_CLASS:
+ return in_return ? 2 : 0;
+ case X86_64_MEMORY_CLASS:
+ gcc_unreachable ();
+ }
+ return 1;
+}
+
+/* Construct container for the argument used by GCC interface. See
+ FUNCTION_ARG for the detailed description. */
+
+static rtx
+construct_container (enum machine_mode mode, enum machine_mode orig_mode,
+ const_tree type, int in_return, int nintregs, int nsseregs,
+ const int *intreg, int sse_regno)
+{
+ /* The following variables hold the static issued_error state. */
+ static bool issued_sse_arg_error;
+ static bool issued_sse_ret_error;
+ static bool issued_x87_ret_error;
+
+ enum machine_mode tmpmode;
+ int bytes =
+ (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
+ enum x86_64_reg_class regclass[MAX_CLASSES];
+ int n;
+ int i;
+ int nexps = 0;
+ int needed_sseregs, needed_intregs;
+ rtx exp[MAX_CLASSES];
+ rtx ret;
+
+ n = classify_argument (mode, type, regclass, 0);
+ if (!n)
+ return NULL;
+ if (!examine_argument (mode, type, in_return, &needed_intregs,
+ &needed_sseregs))
+ return NULL;
+ if (needed_intregs > nintregs || needed_sseregs > nsseregs)
+ return NULL;
+
+ /* We allowed the user to turn off SSE for kernel mode. Don't crash if
+ some less clueful developer tries to use floating-point anyway. */
+ if (needed_sseregs && !TARGET_SSE)
+ {
+ if (in_return)
+ {
+ if (!issued_sse_ret_error)
+ {
+ error ("SSE register return with SSE disabled");
+ issued_sse_ret_error = true;
+ }
+ }
+ else if (!issued_sse_arg_error)
+ {
+ error ("SSE register argument with SSE disabled");
+ issued_sse_arg_error = true;
+ }
+ return NULL;
+ }
+
+ /* Likewise, error if the ABI requires us to return values in the
+ x87 registers and the user specified -mno-80387. */
+ if (!TARGET_80387 && in_return)
+ for (i = 0; i < n; i++)
+ if (regclass[i] == X86_64_X87_CLASS
+ || regclass[i] == X86_64_X87UP_CLASS
+ || regclass[i] == X86_64_COMPLEX_X87_CLASS)
+ {
+ if (!issued_x87_ret_error)
+ {
+ error ("x87 register return with x87 disabled");
+ issued_x87_ret_error = true;
+ }
+ return NULL;
+ }
+
+ /* First construct simple cases. Avoid SCmode, since we want to use
+ single register to pass this type. */
+ if (n == 1 && mode != SCmode)
+ switch (regclass[0])
+ {
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ return gen_rtx_REG (mode, intreg[0]);
+ case X86_64_SSE_CLASS:
+ case X86_64_SSESF_CLASS:
+ case X86_64_SSEDF_CLASS:
+ return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
+ case X86_64_X87_CLASS:
+ case X86_64_COMPLEX_X87_CLASS:
+ return gen_rtx_REG (mode, FIRST_STACK_REG);
+ case X86_64_NO_CLASS:
+ /* Zero sized array, struct or class. */
+ return NULL;
+ default:
+ gcc_unreachable ();
+ }
+ if (n == 2 && regclass[0] == X86_64_SSE_CLASS
+ && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
+ return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
+ if (n == 4
+ && regclass[0] == X86_64_SSE_CLASS
+ && regclass[1] == X86_64_SSEUP_CLASS
+ && regclass[2] == X86_64_SSEUP_CLASS
+ && regclass[3] == X86_64_SSEUP_CLASS
+ && mode != BLKmode)
+ return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
+
+ if (n == 2
+ && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
+ return gen_rtx_REG (XFmode, FIRST_STACK_REG);
+ if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
+ && regclass[1] == X86_64_INTEGER_CLASS
+ && (mode == CDImode || mode == TImode || mode == TFmode)
+ && intreg[0] + 1 == intreg[1])
+ return gen_rtx_REG (mode, intreg[0]);
+
+ /* Otherwise figure out the entries of the PARALLEL. */
+ for (i = 0; i < n; i++)
+ {
+ int pos;
+
+ switch (regclass[i])
+ {
+ case X86_64_NO_CLASS:
+ break;
+ case X86_64_INTEGER_CLASS:
+ case X86_64_INTEGERSI_CLASS:
+ /* Merge TImodes on aligned occasions here too. */
+ if (i * 8 + 8 > bytes)
+ tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
+ else if (regclass[i] == X86_64_INTEGERSI_CLASS)
+ tmpmode = SImode;
+ else
+ tmpmode = DImode;
+ /* We've requested 24 bytes we don't have mode for. Use DImode. */
+ if (tmpmode == BLKmode)
+ tmpmode = DImode;
+ exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_REG (tmpmode, *intreg),
+ GEN_INT (i*8));
+ intreg++;
+ break;
+ case X86_64_SSESF_CLASS:
+ exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_REG (SFmode,
+ SSE_REGNO (sse_regno)),
+ GEN_INT (i*8));
+ sse_regno++;
+ break;
+ case X86_64_SSEDF_CLASS:
+ exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_REG (DFmode,
+ SSE_REGNO (sse_regno)),
+ GEN_INT (i*8));
+ sse_regno++;
+ break;
+ case X86_64_SSE_CLASS:
+ pos = i;
+ switch (n)
+ {
+ case 1:
+ tmpmode = DImode;
+ break;
+ case 2:
+ if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
+ {
+ tmpmode = TImode;
+ i++;
+ }
+ else
+ tmpmode = DImode;
+ break;
+ case 4:
+ gcc_assert (i == 0
+ && regclass[1] == X86_64_SSEUP_CLASS
+ && regclass[2] == X86_64_SSEUP_CLASS
+ && regclass[3] == X86_64_SSEUP_CLASS);
+ tmpmode = OImode;
+ i += 3;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
+ gen_rtx_REG (tmpmode,
+ SSE_REGNO (sse_regno)),
+ GEN_INT (pos*8));
+ sse_regno++;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ /* Empty aligned struct, union or class. */
+ if (nexps == 0)
+ return NULL;
+
+ ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
+ for (i = 0; i < nexps; i++)
+ XVECEXP (ret, 0, i) = exp [i];
+ return ret;
+}
+
+/* Update the data in CUM to advance over an argument of mode MODE
+ and data type TYPE. (TYPE is null for libcalls where that information
+ may not be available.) */
+
+static void
+function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
+{
+ switch (mode)
+ {
+ default:
+ break;
+
+ case BLKmode:
+ if (bytes < 0)
+ break;
+ /* FALLTHRU */
+
+ case DImode:
+ case SImode:
+ case HImode:
+ case QImode:
+ cum->words += words;
+ cum->nregs -= words;
+ cum->regno += words;
+
+ if (cum->nregs <= 0)
+ {
+ cum->nregs = 0;
+ cum->regno = 0;
+ }
+ break;
+
+ case OImode:
+ /* OImode shouldn't be used directly. */
+ gcc_unreachable ();
+
+ case DFmode:
+ if (cum->float_in_sse < 2)
+ break;
+ case SFmode:
+ if (cum->float_in_sse < 1)
+ break;
+ /* FALLTHRU */
+
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
+ case TImode:
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ case V4SFmode:
+ case V2DFmode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ cum->sse_words += words;
+ cum->sse_nregs -= 1;
+ cum->sse_regno += 1;
+ if (cum->sse_nregs <= 0)
+ {
+ cum->sse_nregs = 0;
+ cum->sse_regno = 0;
+ }
+ }
+ break;
+
+ case V8QImode:
+ case V4HImode:
+ case V2SImode:
+ case V2SFmode:
+ case V1DImode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ cum->mmx_words += words;
+ cum->mmx_nregs -= 1;
+ cum->mmx_regno += 1;
+ if (cum->mmx_nregs <= 0)
+ {
+ cum->mmx_nregs = 0;
+ cum->mmx_regno = 0;
+ }
+ }
+ break;
+ }
+}
+
+static void
+function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ tree type, HOST_WIDE_INT words, int named)
+{
+ int int_nregs, sse_nregs;
+
+ /* Unnamed 256bit vector mode parameters are passed on stack. */
+ if (!named && VALID_AVX256_REG_MODE (mode))
+ return;
+
+ if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
+ cum->words += words;
+ else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
+ {
+ cum->nregs -= int_nregs;
+ cum->sse_nregs -= sse_nregs;
+ cum->regno += int_nregs;
+ cum->sse_regno += sse_nregs;
+ }
+ else
+ cum->words += words;
+}
+
+static void
+function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
+ HOST_WIDE_INT words)
+{
+ /* Otherwise, this should be passed indirect. */
+ gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
+
+ cum->words += words;
+ if (cum->nregs > 0)
+ {
+ cum->nregs -= 1;
+ cum->regno += 1;
+ }
+}
+
+void
+function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ tree type, int named)
+{
+ HOST_WIDE_INT bytes, words;
+
+ if (mode == BLKmode)
+ bytes = int_size_in_bytes (type);
+ else
+ bytes = GET_MODE_SIZE (mode);
+ words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ if (type)
+ mode = type_natural_mode (type, NULL);
+
+ if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+ function_arg_advance_ms_64 (cum, bytes, words);
+ else if (TARGET_64BIT)
+ function_arg_advance_64 (cum, mode, type, words, named);
+ else
+ function_arg_advance_32 (cum, mode, type, bytes, words);
+}
+
+/* Define where to put the arguments to a function.
+ Value is zero to push the argument on the stack,
+ or a hard register in which to store the argument.
+
+ MODE is the argument's machine mode.
+ TYPE is the data type of the argument (as a tree).
+ This is null for libcalls where that information may
+ not be available.
+ CUM is a variable of type CUMULATIVE_ARGS which gives info about
+ the preceding args and about the function being called.
+ NAMED is nonzero if this argument is a named parameter
+ (otherwise it is an extra parameter matching an ellipsis). */
+
+static rtx
+function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ enum machine_mode orig_mode, tree type,
+ HOST_WIDE_INT bytes, HOST_WIDE_INT words)
+{
+ static bool warnedsse, warnedmmx;
+
+ /* Avoid the AL settings for the Unix64 ABI. */
+ if (mode == VOIDmode)
+ return constm1_rtx;
+
+ switch (mode)
+ {
+ default:
+ break;
+
+ case BLKmode:
+ if (bytes < 0)
+ break;
+ /* FALLTHRU */
+ case DImode:
+ case SImode:
+ case HImode:
+ case QImode:
+ if (words <= cum->nregs)
+ {
+ int regno = cum->regno;
+
+ /* Fastcall allocates the first two DWORD (SImode) or
+ smaller arguments to ECX and EDX if it isn't an
+ aggregate type . */
+ if (cum->fastcall)
+ {
+ if (mode == BLKmode
+ || mode == DImode
+ || (type && AGGREGATE_TYPE_P (type)))
+ break;
+
+ /* ECX not EAX is the first allocated register. */
+ if (regno == AX_REG)
+ regno = CX_REG;
+ }
+ return gen_rtx_REG (mode, regno);
+ }
+ break;
+
+ case DFmode:
+ if (cum->float_in_sse < 2)
+ break;
+ case SFmode:
+ if (cum->float_in_sse < 1)
+ break;
+ /* FALLTHRU */
+ case TImode:
+ /* In 32bit, we pass TImode in xmm registers. */
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ case V4SFmode:
+ case V2DFmode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ if (!TARGET_SSE && !warnedsse && cum->warn_sse)
+ {
+ warnedsse = true;
+ warning (0, "SSE vector argument without SSE enabled "
+ "changes the ABI");
+ }
+ if (cum->sse_nregs)
+ return gen_reg_or_parallel (mode, orig_mode,
+ cum->sse_regno + FIRST_SSE_REG);
+ }
+ break;
+
+ case OImode:
+ /* OImode shouldn't be used directly. */
+ gcc_unreachable ();
+
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ if (cum->sse_nregs)
+ return gen_reg_or_parallel (mode, orig_mode,
+ cum->sse_regno + FIRST_SSE_REG);
+ }
+ break;
+
+ case V8QImode:
+ case V4HImode:
+ case V2SImode:
+ case V2SFmode:
+ case V1DImode:
+ if (!type || !AGGREGATE_TYPE_P (type))
+ {
+ if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
+ {
+ warnedmmx = true;
+ warning (0, "MMX vector argument without MMX enabled "
+ "changes the ABI");
+ }
+ if (cum->mmx_nregs)
+ return gen_reg_or_parallel (mode, orig_mode,
+ cum->mmx_regno + FIRST_MMX_REG);
+ }
+ break;
+ }
+
+ return NULL_RTX;
+}
+
+static rtx
+function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ enum machine_mode orig_mode, tree type, int named)
+{
+ /* Handle a hidden AL argument containing number of registers
+ for varargs x86-64 functions. */
+ if (mode == VOIDmode)
+ return GEN_INT (cum->maybe_vaarg
+ ? (cum->sse_nregs < 0
+ ? (cum->call_abi == DEFAULT_ABI
+ ? SSE_REGPARM_MAX
+ : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
+ : X64_SSE_REGPARM_MAX))
+ : cum->sse_regno)
+ : -1);
+
+ switch (mode)
+ {
+ default:
+ break;
+
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
+ /* Unnamed 256bit vector mode parameters are passed on stack. */
+ if (!named)
+ return NULL;
+ break;
+ }
+
+ return construct_container (mode, orig_mode, type, 0, cum->nregs,
+ cum->sse_nregs,
+ &x86_64_int_parameter_registers [cum->regno],
+ cum->sse_regno);
+}
+
+static rtx
+function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ enum machine_mode orig_mode, int named,
+ HOST_WIDE_INT bytes)
+{
+ unsigned int regno;
+
+ /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
+ We use value of -2 to specify that current function call is MSABI. */
+ if (mode == VOIDmode)
+ return GEN_INT (-2);
+
+ /* If we've run out of registers, it goes on the stack. */
+ if (cum->nregs == 0)
+ return NULL_RTX;
+
+ regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
+
+ /* Only floating point modes are passed in anything but integer regs. */
+ if (TARGET_SSE && (mode == SFmode || mode == DFmode))
+ {
+ if (named)
+ regno = cum->regno + FIRST_SSE_REG;
+ else
+ {
+ rtx t1, t2;
+
+ /* Unnamed floating parameters are passed in both the
+ SSE and integer registers. */
+ t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
+ t2 = gen_rtx_REG (mode, regno);
+ t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
+ t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
+ return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
+ }
+ }
+ /* Handle aggregated types passed in register. */
+ if (orig_mode == BLKmode)
+ {
+ if (bytes > 0 && bytes <= 8)
+ mode = (bytes > 4 ? DImode : SImode);
+ if (mode == BLKmode)
+ mode = DImode;
+ }
+
+ return gen_reg_or_parallel (mode, orig_mode, regno);
+}
+
+rtx
+function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
+ tree type, int named)
+{
+ enum machine_mode mode = omode;
+ HOST_WIDE_INT bytes, words;
+
+ if (mode == BLKmode)
+ bytes = int_size_in_bytes (type);
+ else
+ bytes = GET_MODE_SIZE (mode);
+ words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ /* To simplify the code below, represent vector types with a vector mode
+ even if MMX/SSE are not active. */
+ if (type && TREE_CODE (type) == VECTOR_TYPE)
+ mode = type_natural_mode (type, cum);
+
+ if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+ return function_arg_ms_64 (cum, mode, omode, named, bytes);
+ else if (TARGET_64BIT)
+ return function_arg_64 (cum, mode, omode, type, named);
+ else
+ return function_arg_32 (cum, mode, omode, type, bytes, words);
+}
+
+/* A C expression that indicates when an argument must be passed by
+ reference. If nonzero for an argument, a copy of that argument is
+ made in memory and a pointer to the argument is passed instead of
+ the argument itself. The pointer is passed in whatever way is
+ appropriate for passing a pointer to that type. */
+
+static bool
+ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+ /* See Windows x64 Software Convention. */
+ if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+ {
+ int msize = (int) GET_MODE_SIZE (mode);
+ if (type)
+ {
+ /* Arrays are passed by reference. */
+ if (TREE_CODE (type) == ARRAY_TYPE)
+ return true;
+
+ if (AGGREGATE_TYPE_P (type))
+ {
+ /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
+ are passed by reference. */
+ msize = int_size_in_bytes (type);
+ }
+ }
+
+ /* __m128 is passed by reference. */
+ switch (msize) {
+ case 1: case 2: case 4: case 8:
+ break;
+ default:
+ return true;
+ }
+ }
+ else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
+ return 1;
+
+ return 0;
+}
+
+/* Return true when TYPE should be 128bit aligned for 32bit argument passing
+ ABI. */
+static bool
+contains_aligned_value_p (tree type)
+{
+ enum machine_mode mode = TYPE_MODE (type);
+ if (((TARGET_SSE && SSE_REG_MODE_P (mode))
+ || mode == TDmode
+ || mode == TFmode
+ || mode == TCmode)
+ && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
+ return true;
+ if (TYPE_ALIGN (type) < 128)
+ return false;
+
+ if (AGGREGATE_TYPE_P (type))
+ {
+ /* Walk the aggregates recursively. */
+ switch (TREE_CODE (type))
+ {
+ case RECORD_TYPE:
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ {
+ tree field;
+
+ /* Walk all the structure fields. */
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) == FIELD_DECL
+ && contains_aligned_value_p (TREE_TYPE (field)))
+ return true;
+ }
+ break;
+ }
+
+ case ARRAY_TYPE:
+ /* Just for use if some languages passes arrays by value. */
+ if (contains_aligned_value_p (TREE_TYPE (type)))
+ return true;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ return false;
+}
+
+/* Gives the alignment boundary, in bits, of an argument with the
+ specified mode and type. */
+
+int
+ix86_function_arg_boundary (enum machine_mode mode, tree type)
+{
+ int align;
+ if (type)
+ {
+ /* Since canonical type is used for call, we convert it to
+ canonical type if needed. */
+ if (!TYPE_STRUCTURAL_EQUALITY_P (type))
+ type = TYPE_CANONICAL (type);
+ align = TYPE_ALIGN (type);
+ }
+ else
+ align = GET_MODE_ALIGNMENT (mode);
+ if (align < PARM_BOUNDARY)
+ align = PARM_BOUNDARY;
+ /* In 32bit, only _Decimal128 and __float128 are aligned to their
+ natural boundaries. */
+ if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
+ {
+ /* i386 ABI defines all arguments to be 4 byte aligned. We have to
+ make an exception for SSE modes since these require 128bit
+ alignment.
+
+ The handling here differs from field_alignment. ICC aligns MMX
+ arguments to 4 byte boundaries, while structure fields are aligned
+ to 8 byte boundaries. */
+ if (!type)
+ {
+ if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
+ align = PARM_BOUNDARY;
+ }
+ else
+ {
+ if (!contains_aligned_value_p (type))
+ align = PARM_BOUNDARY;
+ }
+ }
+ if (align > BIGGEST_ALIGNMENT)
+ align = BIGGEST_ALIGNMENT;
+ return align;
+}
+
+/* Return true if N is a possible register number of function value. */
+
+bool
+ix86_function_value_regno_p (int regno)
+{
+ switch (regno)
+ {
+ case 0:
+ return true;
+
+ case FIRST_FLOAT_REG:
+ /* TODO: The function should depend on current function ABI but
+ builtins.c would need updating then. Therefore we use the
+ default ABI. */
+ if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
+ return false;
+ return TARGET_FLOAT_RETURNS_IN_80387;
+
+ case FIRST_SSE_REG:
+ return TARGET_SSE;
+
+ case FIRST_MMX_REG:
+ if (TARGET_MACHO || TARGET_64BIT)
+ return false;
+ return TARGET_MMX;
+ }
+
+ return false;
+}
+
+/* Define how to find the value returned by a function.
+ VALTYPE is the data type of the value (as a tree).
+ If the precise function being called is known, FUNC is its FUNCTION_DECL;
+ otherwise, FUNC is 0. */
+
+static rtx
+function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
+ const_tree fntype, const_tree fn)
+{
+ unsigned int regno;
+
+ /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
+ we normally prevent this case when mmx is not available. However
+ some ABIs may require the result to be returned like DImode. */
+ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
+ regno = TARGET_MMX ? FIRST_MMX_REG : 0;
+
+ /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
+ we prevent this case when sse is not available. However some ABIs
+ may require the result to be returned like integer TImode. */
+ else if (mode == TImode
+ || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
+ regno = TARGET_SSE ? FIRST_SSE_REG : 0;
+
+ /* 32-byte vector modes in %ymm0. */
+ else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
+ regno = TARGET_AVX ? FIRST_SSE_REG : 0;
+
+ /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
+ else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
+ regno = FIRST_FLOAT_REG;
+ else
+ /* Most things go in %eax. */
+ regno = AX_REG;
+
+ /* Override FP return register with %xmm0 for local functions when
+ SSE math is enabled or for functions with sseregparm attribute. */
+ if ((fn || fntype) && (mode == SFmode || mode == DFmode))
+ {
+ int sse_level = ix86_function_sseregparm (fntype, fn, false);
+ if ((sse_level >= 1 && mode == SFmode)
+ || (sse_level == 2 && mode == DFmode))
+ regno = FIRST_SSE_REG;
+ }
+
+ /* OImode shouldn't be used directly. */
+ gcc_assert (mode != OImode);
+
+ return gen_rtx_REG (orig_mode, regno);
+}
+
+static rtx
+function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
+ const_tree valtype)
+{
+ rtx ret;
+
+ /* Handle libcalls, which don't provide a type node. */
+ if (valtype == NULL)
+ {
+ switch (mode)
+ {
+ case SFmode:
+ case SCmode:
+ case DFmode:
+ case DCmode:
+ case TFmode:
+ case SDmode:
+ case DDmode:
+ case TDmode:
+ return gen_rtx_REG (mode, FIRST_SSE_REG);
+ case XFmode:
+ case XCmode:
+ return gen_rtx_REG (mode, FIRST_FLOAT_REG);
+ case TCmode:
+ return NULL;
+ default:
+ return gen_rtx_REG (mode, AX_REG);
+ }
+ }
+
+ ret = construct_container (mode, orig_mode, valtype, 1,
+ X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
+ x86_64_int_return_registers, 0);
+
+ /* For zero sized structures, construct_container returns NULL, but we
+ need to keep rest of compiler happy by returning meaningful value. */
+ if (!ret)
+ ret = gen_rtx_REG (orig_mode, AX_REG);
+
+ return ret;
+}
+
+static rtx
+function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
+{
+ unsigned int regno = AX_REG;
+
+ if (TARGET_SSE)
+ {
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 16:
+ if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
+ && !COMPLEX_MODE_P (mode))
+ regno = FIRST_SSE_REG;
+ break;
+ case 8:
+ case 4:
+ if (mode == SFmode || mode == DFmode)
+ regno = FIRST_SSE_REG;
+ break;
+ default:
+ break;
+ }
+ }
+ return gen_rtx_REG (orig_mode, regno);
+}
+
+static rtx
+ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
+ enum machine_mode orig_mode, enum machine_mode mode)
+{
+ const_tree fn, fntype;
+
+ fn = NULL_TREE;
+ if (fntype_or_decl && DECL_P (fntype_or_decl))
+ fn = fntype_or_decl;
+ fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
+
+ if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
+ return function_value_ms_64 (orig_mode, mode);
+ else if (TARGET_64BIT)
+ return function_value_64 (orig_mode, mode, valtype);
+ else
+ return function_value_32 (orig_mode, mode, fntype, fn);
+}
+
+static rtx
+ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
+ bool outgoing ATTRIBUTE_UNUSED)
+{
+ enum machine_mode mode, orig_mode;
+
+ orig_mode = TYPE_MODE (valtype);
+ mode = type_natural_mode (valtype, NULL);
+ return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
+}
+
+rtx
+ix86_libcall_value (enum machine_mode mode)
+{
+ return ix86_function_value_1 (NULL, NULL, mode, mode);
+}
+
+/* Return true iff type is returned in memory. */
+
+static int ATTRIBUTE_UNUSED
+return_in_memory_32 (const_tree type, enum machine_mode mode)
+{
+ HOST_WIDE_INT size;
+
+ if (mode == BLKmode)
+ return 1;
+
+ size = int_size_in_bytes (type);
+
+ if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
+ return 0;
+
+ if (VECTOR_MODE_P (mode) || mode == TImode)
+ {
+ /* User-created vectors small enough to fit in EAX. */
+ if (size < 8)
+ return 0;
+
+ /* MMX/3dNow values are returned in MM0,
+ except when it doesn't exits. */
+ if (size == 8)
+ return (TARGET_MMX ? 0 : 1);
+
+ /* SSE values are returned in XMM0, except when it doesn't exist. */
+ if (size == 16)
+ return (TARGET_SSE ? 0 : 1);
+
+ /* AVX values are returned in YMM0, except when it doesn't exist. */
+ if (size == 32)
+ return TARGET_AVX ? 0 : 1;
+ }
+
+ if (mode == XFmode)
+ return 0;
+
+ if (size > 12)
+ return 1;
+
+ /* OImode shouldn't be used directly. */
+ gcc_assert (mode != OImode);
+
+ return 0;
+}
+
+static int ATTRIBUTE_UNUSED
+return_in_memory_64 (const_tree type, enum machine_mode mode)
+{
+ int needed_intregs, needed_sseregs;
+ return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
+}
+
+static int ATTRIBUTE_UNUSED
+return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
+{
+ HOST_WIDE_INT size = int_size_in_bytes (type);
+
+ /* __m128 is returned in xmm0. */
+ if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
+ && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
+ return 0;
+
+ /* Otherwise, the size must be exactly in [1248]. */
+ return (size != 1 && size != 2 && size != 4 && size != 8);
+}
+
+static bool
+ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+#ifdef SUBTARGET_RETURN_IN_MEMORY
+ return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
+#else
+ const enum machine_mode mode = type_natural_mode (type, NULL);
+
+ if (TARGET_64BIT)
+ {
+ if (ix86_function_type_abi (fntype) == MS_ABI)
+ return return_in_memory_ms_64 (type, mode);
+ else
+ return return_in_memory_64 (type, mode);
+ }
+ else
+ return return_in_memory_32 (type, mode);
+#endif
+}
+
+/* Return false iff TYPE is returned in memory. This version is used
+ on Solaris 10. It is similar to the generic ix86_return_in_memory,
+ but differs notably in that when MMX is available, 8-byte vectors
+ are returned in memory, rather than in MMX registers. */
+
+bool
+ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+ int size;
+ enum machine_mode mode = type_natural_mode (type, NULL);
+
+ if (TARGET_64BIT)
+ return return_in_memory_64 (type, mode);
+
+ if (mode == BLKmode)
+ return 1;
+
+ size = int_size_in_bytes (type);
+
+ if (VECTOR_MODE_P (mode))
+ {
+ /* Return in memory only if MMX registers *are* available. This
+ seems backwards, but it is consistent with the existing
+ Solaris x86 ABI. */
+ if (size == 8)
+ return TARGET_MMX;
+ if (size == 16)
+ return !TARGET_SSE;
+ }
+ else if (mode == TImode)
+ return !TARGET_SSE;
+ else if (mode == XFmode)
+ return 0;
+
+ return size > 12;
+}
+
+/* When returning SSE vector types, we have a choice of either
+ (1) being abi incompatible with a -march switch, or
+ (2) generating an error.
+ Given no good solution, I think the safest thing is one warning.
+ The user won't be able to use -Werror, but....
+
+ Choose the STRUCT_VALUE_RTX hook because that's (at present) only
+ called in response to actually generating a caller or callee that
+ uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
+ via aggregate_value_p for general type probing from tree-ssa. */
+
+static rtx
+ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
+{
+ static bool warnedsse, warnedmmx;
+
+ if (!TARGET_64BIT && type)
+ {
+ /* Look at the return type of the function, not the function type. */
+ enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
+
+ if (!TARGET_SSE && !warnedsse)
+ {
+ if (mode == TImode
+ || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
+ {
+ warnedsse = true;
+ warning (0, "SSE vector return without SSE enabled "
+ "changes the ABI");
+ }
+ }
+
+ if (!TARGET_MMX && !warnedmmx)
+ {
+ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
+ {
+ warnedmmx = true;
+ warning (0, "MMX vector return without MMX enabled "
+ "changes the ABI");
+ }
+ }
+ }
+
+ return NULL;
+}
+
+
+/* Create the va_list data type. */
+
+/* Returns the calling convention specific va_list date type.
+ The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
+
+static tree
+ix86_build_builtin_va_list_abi (enum calling_abi abi)
+{
+ tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
+
+ /* For i386 we use plain pointer to argument area. */
+ if (!TARGET_64BIT || abi == MS_ABI)
+ return build_pointer_type (char_type_node);
+
+ record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+ type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+ f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
+ unsigned_type_node);
+ f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
+ unsigned_type_node);
+ f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
+ ptr_type_node);
+ f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
+ ptr_type_node);
+
+ va_list_gpr_counter_field = f_gpr;
+ va_list_fpr_counter_field = f_fpr;
+
+ DECL_FIELD_CONTEXT (f_gpr) = record;
+ DECL_FIELD_CONTEXT (f_fpr) = record;
+ DECL_FIELD_CONTEXT (f_ovf) = record;
+ DECL_FIELD_CONTEXT (f_sav) = record;
+
+ TREE_CHAIN (record) = type_decl;
+ TYPE_NAME (record) = type_decl;
+ TYPE_FIELDS (record) = f_gpr;
+ TREE_CHAIN (f_gpr) = f_fpr;
+ TREE_CHAIN (f_fpr) = f_ovf;
+ TREE_CHAIN (f_ovf) = f_sav;
+
+ layout_type (record);
+
+ /* The correct type is an array type of one element. */
+ return build_array_type (record, build_index_type (size_zero_node));
+}
+
+/* Setup the builtin va_list data type and for 64-bit the additional
+ calling convention specific va_list data types. */
+
+static tree
+ix86_build_builtin_va_list (void)
+{
+ tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
+
+ /* Initialize abi specific va_list builtin types. */
+ if (TARGET_64BIT)
+ {
+ tree t;
+ if (DEFAULT_ABI == MS_ABI)
+ {
+ t = ix86_build_builtin_va_list_abi (SYSV_ABI);
+ if (TREE_CODE (t) != RECORD_TYPE)
+ t = build_variant_type_copy (t);
+ sysv_va_list_type_node = t;
+ }
+ else
+ {
+ t = ret;
+ if (TREE_CODE (t) != RECORD_TYPE)
+ t = build_variant_type_copy (t);
+ sysv_va_list_type_node = t;
+ }
+ if (DEFAULT_ABI != MS_ABI)
+ {
+ t = ix86_build_builtin_va_list_abi (MS_ABI);
+ if (TREE_CODE (t) != RECORD_TYPE)
+ t = build_variant_type_copy (t);
+ ms_va_list_type_node = t;
+ }
+ else
+ {
+ t = ret;
+ if (TREE_CODE (t) != RECORD_TYPE)
+ t = build_variant_type_copy (t);
+ ms_va_list_type_node = t;
+ }
+ }
+
+ return ret;
+}
+
+/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
+
+static void
+setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
+{
+ rtx save_area, mem;
+ rtx label;
+ rtx label_ref;
+ rtx tmp_reg;
+ rtx nsse_reg;
+ alias_set_type set;
+ int i;
+ int regparm = ix86_regparm;
+
+ if (cum->call_abi != DEFAULT_ABI)
+ regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
+
+ /* GPR size of varargs save area. */
+ if (cfun->va_list_gpr_size)
+ ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
+ else
+ ix86_varargs_gpr_size = 0;
+
+ /* FPR size of varargs save area. We don't need it if we don't pass
+ anything in SSE registers. */
+ if (cum->sse_nregs && cfun->va_list_fpr_size)
+ ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
+ else
+ ix86_varargs_fpr_size = 0;
+
+ if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
+ return;
+
+ save_area = frame_pointer_rtx;
+ set = get_varargs_alias_set ();
+
+ for (i = cum->regno;
+ i < regparm
+ && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
+ i++)
+ {
+ mem = gen_rtx_MEM (Pmode,
+ plus_constant (save_area, i * UNITS_PER_WORD));
+ MEM_NOTRAP_P (mem) = 1;
+ set_mem_alias_set (mem, set);
+ emit_move_insn (mem, gen_rtx_REG (Pmode,
+ x86_64_int_parameter_registers[i]));
+ }
+
+ if (ix86_varargs_fpr_size)
+ {
+ /* Now emit code to save SSE registers. The AX parameter contains number
+ of SSE parameter registers used to call this function. We use
+ sse_prologue_save insn template that produces computed jump across
+ SSE saves. We need some preparation work to get this working. */
+
+ label = gen_label_rtx ();
+ label_ref = gen_rtx_LABEL_REF (Pmode, label);
+
+ /* Compute address to jump to :
+ label - eax*4 + nnamed_sse_arguments*4 Or
+ label - eax*5 + nnamed_sse_arguments*5 for AVX. */
+ tmp_reg = gen_reg_rtx (Pmode);
+ nsse_reg = gen_reg_rtx (Pmode);
+ emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
+ emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
+ gen_rtx_MULT (Pmode, nsse_reg,
+ GEN_INT (4))));
+
+ /* vmovaps is one byte longer than movaps. */
+ if (TARGET_AVX)
+ emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
+ gen_rtx_PLUS (Pmode, tmp_reg,
+ nsse_reg)));
+
+ if (cum->sse_regno)
+ emit_move_insn
+ (nsse_reg,
+ gen_rtx_CONST (DImode,
+ gen_rtx_PLUS (DImode,
+ label_ref,
+ GEN_INT (cum->sse_regno
+ * (TARGET_AVX ? 5 : 4)))));
+ else
+ emit_move_insn (nsse_reg, label_ref);
+ emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
+
+ /* Compute address of memory block we save into. We always use pointer
+ pointing 127 bytes after first byte to store - this is needed to keep
+ instruction size limited by 4 bytes (5 bytes for AVX) with one
+ byte displacement. */
+ tmp_reg = gen_reg_rtx (Pmode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
+ plus_constant (save_area,
+ ix86_varargs_gpr_size + 127)));
+ mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
+ MEM_NOTRAP_P (mem) = 1;
+ set_mem_alias_set (mem, set);
+ set_mem_align (mem, BITS_PER_WORD);
+
+ /* And finally do the dirty job! */
+ emit_insn (gen_sse_prologue_save (mem, nsse_reg,
+ GEN_INT (cum->sse_regno), label));
+ }
+}
+
+static void
+setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
+{
+ alias_set_type set = get_varargs_alias_set ();
+ int i;
+
+ for (i = cum->regno; i < X64_REGPARM_MAX; i++)
+ {
+ rtx reg, mem;
+
+ mem = gen_rtx_MEM (Pmode,
+ plus_constant (virtual_incoming_args_rtx,
+ i * UNITS_PER_WORD));
+ MEM_NOTRAP_P (mem) = 1;
+ set_mem_alias_set (mem, set);
+
+ reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
+ emit_move_insn (mem, reg);
+ }
+}
+
+static void
+ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ tree type, int *pretend_size ATTRIBUTE_UNUSED,
+ int no_rtl)
+{
+ CUMULATIVE_ARGS next_cum;
+ tree fntype;
+
+ /* This argument doesn't appear to be used anymore. Which is good,
+ because the old code here didn't suppress rtl generation. */
+ gcc_assert (!no_rtl);
+
+ if (!TARGET_64BIT)
+ return;
+
+ fntype = TREE_TYPE (current_function_decl);
+
+ /* For varargs, we do not want to skip the dummy va_dcl argument.
+ For stdargs, we do want to skip the last named argument. */
+ next_cum = *cum;
+ if (stdarg_p (fntype))
+ function_arg_advance (&next_cum, mode, type, 1);
+
+ if (cum->call_abi == MS_ABI)
+ setup_incoming_varargs_ms_64 (&next_cum);
+ else
+ setup_incoming_varargs_64 (&next_cum);
+}
+
+/* Checks if TYPE is of kind va_list char *. */
+
+static bool
+is_va_list_char_pointer (tree type)
+{
+ tree canonic;
+
+ /* For 32-bit it is always true. */
+ if (!TARGET_64BIT)
+ return true;
+ canonic = ix86_canonical_va_list_type (type);
+ return (canonic == ms_va_list_type_node
+ || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
+}
+
+/* Implement va_start. */
+
+static void
+ix86_va_start (tree valist, rtx nextarg)
+{
+ HOST_WIDE_INT words, n_gpr, n_fpr;
+ tree f_gpr, f_fpr, f_ovf, f_sav;
+ tree gpr, fpr, ovf, sav, t;
+ tree type;
+
+ /* Only 64bit target needs something special. */
+ if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
+ {
+ std_expand_builtin_va_start (valist, nextarg);
+ return;
+ }
+
+ f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
+ f_fpr = TREE_CHAIN (f_gpr);
+ f_ovf = TREE_CHAIN (f_fpr);
+ f_sav = TREE_CHAIN (f_ovf);
+
+ valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
+ gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
+ fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
+ ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
+ sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+
+ /* Count number of gp and fp argument registers used. */
+ words = crtl->args.info.words;
+ n_gpr = crtl->args.info.regno;
+ n_fpr = crtl->args.info.sse_regno;
+
+ if (cfun->va_list_gpr_size)
+ {
+ type = TREE_TYPE (gpr);
+ t = build2 (MODIFY_EXPR, type,
+ gpr, build_int_cst (type, n_gpr * 8));
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+ }
+
+ if (TARGET_SSE && cfun->va_list_fpr_size)
+ {
+ type = TREE_TYPE (fpr);
+ t = build2 (MODIFY_EXPR, type, fpr,
+ build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+ }
+
+ /* Find the overflow area. */
+ type = TREE_TYPE (ovf);
+ t = make_tree (type, crtl->args.internal_arg_pointer);
+ if (words != 0)
+ t = build2 (POINTER_PLUS_EXPR, type, t,
+ size_int (words * UNITS_PER_WORD));
+ t = build2 (MODIFY_EXPR, type, ovf, t);
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+ if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
+ {
+ /* Find the register save area.
+ Prologue of the function save it right above stack frame. */
+ type = TREE_TYPE (sav);
+ t = make_tree (type, frame_pointer_rtx);
+ if (!ix86_varargs_gpr_size)
+ t = build2 (POINTER_PLUS_EXPR, type, t,
+ size_int (-8 * X86_64_REGPARM_MAX));
+ t = build2 (MODIFY_EXPR, type, sav, t);
+ TREE_SIDE_EFFECTS (t) = 1;
+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+ }
+}
+
+/* Implement va_arg. */
+
+static tree
+ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+ gimple_seq *post_p)
+{
+ static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
+ tree f_gpr, f_fpr, f_ovf, f_sav;
+ tree gpr, fpr, ovf, sav, t;
+ int size, rsize;
+ tree lab_false, lab_over = NULL_TREE;
+ tree addr, t2;
+ rtx container;
+ int indirect_p = 0;
+ tree ptrtype;
+ enum machine_mode nat_mode;
+ int arg_boundary;
+
+ /* Only 64bit target needs something special. */
+ if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
+ return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+
+ f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
+ f_fpr = TREE_CHAIN (f_gpr);
+ f_ovf = TREE_CHAIN (f_fpr);
+ f_sav = TREE_CHAIN (f_ovf);
+
+ gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
+ build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
+ valist = build_va_arg_indirect_ref (valist);
+ fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
+ ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
+ sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
+
+ indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
+ if (indirect_p)
+ type = build_pointer_type (type);
+ size = int_size_in_bytes (type);
+ rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+
+ nat_mode = type_natural_mode (type, NULL);
+ switch (nat_mode)
+ {
+ case V8SFmode:
+ case V8SImode:
+ case V32QImode:
+ case V16HImode:
+ case V4DFmode:
+ case V4DImode:
+ /* Unnamed 256bit vector mode parameters are passed on stack. */
+ if (ix86_cfun_abi () == SYSV_ABI)
+ {
+ container = NULL;
+ break;
+ }
+
+ default:
+ container = construct_container (nat_mode, TYPE_MODE (type),
+ type, 0, X86_64_REGPARM_MAX,
+ X86_64_SSE_REGPARM_MAX, intreg,
+ 0);
+ break;
+ }
+
+ /* Pull the value out of the saved registers. */
+
+ addr = create_tmp_var (ptr_type_node, "addr");
+ DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
+
+ if (container)
+ {
+ int needed_intregs, needed_sseregs;
+ bool need_temp;
+ tree int_addr, sse_addr;
+
+ lab_false = create_artificial_label ();
+ lab_over = create_artificial_label ();
+
+ examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
+
+ need_temp = (!REG_P (container)
+ && ((needed_intregs && TYPE_ALIGN (type) > 64)
+ || TYPE_ALIGN (type) > 128));
+
+ /* In case we are passing structure, verify that it is consecutive block
+ on the register save area. If not we need to do moves. */
+ if (!need_temp && !REG_P (container))
+ {
+ /* Verify that all registers are strictly consecutive */
+ if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
+ {
+ int i;
+
+ for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
+ {
+ rtx slot = XVECEXP (container, 0, i);
+ if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
+ || INTVAL (XEXP (slot, 1)) != i * 16)
+ need_temp = 1;
+ }
+ }
+ else
+ {
+ int i;
+
+ for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
+ {
+ rtx slot = XVECEXP (container, 0, i);
+ if (REGNO (XEXP (slot, 0)) != (unsigned int) i
+ || INTVAL (XEXP (slot, 1)) != i * 8)
+ need_temp = 1;
+ }
+ }
+ }
+ if (!need_temp)
+ {
+ int_addr = addr;
+ sse_addr = addr;
+ }
+ else
+ {
+ int_addr = create_tmp_var (ptr_type_node, "int_addr");
+ DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
+ sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
+ DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
+ }
+
+ /* First ensure that we fit completely in registers. */
+ if (needed_intregs)
+ {
+ t = build_int_cst (TREE_TYPE (gpr),
+ (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
+ t = build2 (GE_EXPR, boolean_type_node, gpr, t);
+ t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
+ t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
+ gimplify_and_add (t, pre_p);
+ }
+ if (needed_sseregs)
+ {
+ t = build_int_cst (TREE_TYPE (fpr),
+ (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
+ + X86_64_REGPARM_MAX * 8);
+ t = build2 (GE_EXPR, boolean_type_node, fpr, t);
+ t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
+ t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
+ gimplify_and_add (t, pre_p);
+ }
+
+ /* Compute index to start of area used for integer regs. */
+ if (needed_intregs)
+ {
+ /* int_addr = gpr + sav; */
+ t = fold_convert (sizetype, gpr);
+ t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
+ gimplify_assign (int_addr, t, pre_p);
+ }
+ if (needed_sseregs)
+ {
+ /* sse_addr = fpr + sav; */
+ t = fold_convert (sizetype, fpr);
+ t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
+ gimplify_assign (sse_addr, t, pre_p);
+ }
+ if (need_temp)
+ {
+ int i;
+ tree temp = create_tmp_var (type, "va_arg_tmp");
+
+ /* addr = &temp; */
+ t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
+ gimplify_assign (addr, t, pre_p);
+
+ for (i = 0; i < XVECLEN (container, 0); i++)
+ {
+ rtx slot = XVECEXP (container, 0, i);
+ rtx reg = XEXP (slot, 0);
+ enum machine_mode mode = GET_MODE (reg);
+ tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
+ tree addr_type = build_pointer_type (piece_type);
+ tree daddr_type = build_pointer_type_for_mode (piece_type,
+ ptr_mode, true);
+ tree src_addr, src;
+ int src_offset;
+ tree dest_addr, dest;
+
+ if (SSE_REGNO_P (REGNO (reg)))
+ {
+ src_addr = sse_addr;
+ src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
+ }
+ else
+ {
+ src_addr = int_addr;
+ src_offset = REGNO (reg) * 8;
+ }
+ src_addr = fold_convert (addr_type, src_addr);
+ src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
+ size_int (src_offset));
+ src = build_va_arg_indirect_ref (src_addr);
+
+ dest_addr = fold_convert (daddr_type, addr);
+ dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
+ size_int (INTVAL (XEXP (slot, 1))));
+ dest = build_va_arg_indirect_ref (dest_addr);
+
+ gimplify_assign (dest, src, pre_p);
+ }
+ }
+
+ if (needed_intregs)
+ {
+ t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
+ build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
+ gimplify_assign (gpr, t, pre_p);
+ }
+
+ if (needed_sseregs)
+ {
+ t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
+ build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
+ gimplify_assign (fpr, t, pre_p);
+ }
+
+ gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
+
+ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
+ }
+
+ /* ... otherwise out of the overflow area. */
+
+ /* When we align parameter on stack for caller, if the parameter
+ alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
+ aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
+ here with caller. */
+ arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
+ if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
+ arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
+
+ /* Care for on-stack alignment if needed. */
+ if (arg_boundary <= 64
+ || integer_zerop (TYPE_SIZE (type)))
+ t = ovf;
+ else
+ {
+ HOST_WIDE_INT align = arg_boundary / 8;
+ t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
+ size_int (align - 1));
+ t = fold_convert (sizetype, t);
+ t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
+ size_int (-align));
+ t = fold_convert (TREE_TYPE (ovf), t);
+ }
+ gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
+ gimplify_assign (addr, t, pre_p);
+
+ t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
+ size_int (rsize * UNITS_PER_WORD));
+ gimplify_assign (unshare_expr (ovf), t, pre_p);
+
+ if (container)
+ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
+
+ ptrtype = build_pointer_type (type);
+ addr = fold_convert (ptrtype, addr);
+
+ if (indirect_p)
+ addr = build_va_arg_indirect_ref (addr);
+ return build_va_arg_indirect_ref (addr);
+}
+
+/* Return nonzero if OPNUM's MEM should be matched
+ in movabs* patterns. */
+
+int
+ix86_check_movabs (rtx insn, int opnum)
+{
+ rtx set, mem;
+
+ set = PATTERN (insn);
+ if (GET_CODE (set) == PARALLEL)
+ set = XVECEXP (set, 0, 0);
+ gcc_assert (GET_CODE (set) == SET);
+ mem = XEXP (set, opnum);
+ while (GET_CODE (mem) == SUBREG)
+ mem = SUBREG_REG (mem);
+ gcc_assert (MEM_P (mem));
+ return (volatile_ok || !MEM_VOLATILE_P (mem));
+}
+
+/* Initialize the table of extra 80387 mathematical constants. */
+
+static void
+init_ext_80387_constants (void)
+{
+ static const char * cst[5] =
+ {
+ "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
+ "0.6931471805599453094286904741849753009", /* 1: fldln2 */
+ "1.4426950408889634073876517827983434472", /* 2: fldl2e */
+ "3.3219280948873623478083405569094566090", /* 3: fldl2t */
+ "3.1415926535897932385128089594061862044", /* 4: fldpi */
+ };
+ int i;
+
+ for (i = 0; i < 5; i++)
+ {
+ real_from_string (&ext_80387_constants_table[i], cst[i]);
+ /* Ensure each constant is rounded to XFmode precision. */
+ real_convert (&ext_80387_constants_table[i],
+ XFmode, &ext_80387_constants_table[i]);
+ }
+
+ ext_80387_constants_init = 1;
+}
+
+/* Return true if the constant is something that can be loaded with
+ a special instruction. */
+
+int
+standard_80387_constant_p (rtx x)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ REAL_VALUE_TYPE r;
+
+ if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
+ return -1;
+
+ if (x == CONST0_RTX (mode))
+ return 1;
+ if (x == CONST1_RTX (mode))
+ return 2;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+ /* For XFmode constants, try to find a special 80387 instruction when
+ optimizing for size or on those CPUs that benefit from them. */
+ if (mode == XFmode
+ && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
+ {
+ int i;
+
+ if (! ext_80387_constants_init)
+ init_ext_80387_constants ();
+
+ for (i = 0; i < 5; i++)
+ if (real_identical (&r, &ext_80387_constants_table[i]))
+ return i + 3;
+ }
+
+ /* Load of the constant -0.0 or -1.0 will be split as
+ fldz;fchs or fld1;fchs sequence. */
+ if (real_isnegzero (&r))
+ return 8;
+ if (real_identical (&r, &dconstm1))
+ return 9;
+
+ return 0;
+}
+
+/* Return the opcode of the special instruction to be used to load
+ the constant X. */
+
+const char *
+standard_80387_constant_opcode (rtx x)
+{
+ switch (standard_80387_constant_p (x))
+ {
+ case 1:
+ return "fldz";
+ case 2:
+ return "fld1";
+ case 3:
+ return "fldlg2";
+ case 4:
+ return "fldln2";
+ case 5:
+ return "fldl2e";
+ case 6:
+ return "fldl2t";
+ case 7:
+ return "fldpi";
+ case 8:
+ case 9:
+ return "#";
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return the CONST_DOUBLE representing the 80387 constant that is
+ loaded by the specified special instruction. The argument IDX
+ matches the return value from standard_80387_constant_p. */
+
+rtx
+standard_80387_constant_rtx (int idx)
+{
+ int i;
+
+ if (! ext_80387_constants_init)
+ init_ext_80387_constants ();
+
+ switch (idx)
+ {
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ i = idx - 3;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
+ XFmode);
+}
+
+/* Return 1 if mode is a valid mode for sse. */
+static int
+standard_sse_mode_p (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ case V4SFmode:
+ case V2DFmode:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+/* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
+ SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
+ modes and AVX is enabled. */
+
+int
+standard_sse_constant_p (rtx x)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
+ return 1;
+ if (vector_all_ones_operand (x, mode))
+ {
+ if (standard_sse_mode_p (mode))
+ return TARGET_SSE2 ? 2 : -2;
+ else if (VALID_AVX256_REG_MODE (mode))
+ return TARGET_AVX ? 3 : -3;
+ }
+
+ return 0;
+}
+
+/* Return the opcode of the special instruction to be used to load
+ the constant X. */
+
+const char *
+standard_sse_constant_opcode (rtx insn, rtx x)
+{
+ switch (standard_sse_constant_p (x))
+ {
+ case 1:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
+ case MODE_V2DF:
+ return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
+ case MODE_TI:
+ return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
+ case MODE_V8SF:
+ return "vxorps\t%x0, %x0, %x0";
+ case MODE_V4DF:
+ return "vxorpd\t%x0, %x0, %x0";
+ case MODE_OI:
+ return "vpxor\t%x0, %x0, %x0";
+ default:
+ gcc_unreachable ();
+ }
+ case 2:
+ if (TARGET_AVX)
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ case MODE_V2DF:
+ case MODE_TI:
+ return "vpcmpeqd\t%0, %0, %0";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ else
+ return "pcmpeqd\t%0, %0";
+ }
+ gcc_unreachable ();
+}
+
+/* Returns 1 if OP contains a symbol reference */
+
+int
+symbolic_reference_mentioned_p (rtx op)
+{
+ const char *fmt;
+ int i;
+
+ if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+ return 1;
+
+ fmt = GET_RTX_FORMAT (GET_CODE (op));
+ for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+ {
+ if (fmt[i] == 'E')
+ {
+ int j;
+
+ for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+ if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+ return 1;
+ }
+
+ else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Return 1 if it is appropriate to emit `ret' instructions in the
+ body of a function. Do this only if the epilogue is simple, needing a
+ couple of insns. Prior to reloading, we can't tell how many registers
+ must be saved, so return 0 then. Return 0 if there is no frame
+ marker to de-allocate. */
+
+int
+ix86_can_use_return_insn_p (void)
+{
+ struct ix86_frame frame;
+
+ if (! reload_completed || frame_pointer_needed)
+ return 0;
+
+ /* Don't allow more than 32 pop, since that's all we can do
+ with one instruction. */
+ if (crtl->args.pops_args
+ && crtl->args.size >= 32768)
+ return 0;
+
+ ix86_compute_frame_layout (&frame);
+ return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
+}
+
+/* Value should be nonzero if functions must have frame pointers.
+ Zero means the frame pointer need not be set up (and parms may
+ be accessed via the stack pointer) in functions that seem suitable. */
+
+int
+ix86_frame_pointer_required (void)
+{
+ /* If we accessed previous frames, then the generated code expects
+ to be able to access the saved ebp value in our frame. */
+ if (cfun->machine->accesses_prev_frame)
+ return 1;
+
+ /* Several x86 os'es need a frame pointer for other reasons,
+ usually pertaining to setjmp. */
+ if (SUBTARGET_FRAME_POINTER_REQUIRED)
+ return 1;
+
+ /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
+ the frame pointer by default. Turn it back on now if we've not
+ got a leaf function. */
+ if (TARGET_OMIT_LEAF_FRAME_POINTER
+ && (!current_function_is_leaf
+ || ix86_current_function_calls_tls_descriptor))
+ return 1;
+
+ if (crtl->profile)
+ return 1;
+
+ return 0;
+}
+
+/* Record that the current function accesses previous call frames. */
+
+void
+ix86_setup_frame_addresses (void)
+{
+ cfun->machine->accesses_prev_frame = 1;
+}
+
+#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
+# define USE_HIDDEN_LINKONCE 1
+#else
+# define USE_HIDDEN_LINKONCE 0
+#endif
+
+static int pic_labels_used;
+
+/* Fills in the label name that should be used for a pc thunk for
+ the given register. */
+
+static void
+get_pc_thunk_name (char name[32], unsigned int regno)
+{
+ gcc_assert (!TARGET_64BIT);
+
+ if (USE_HIDDEN_LINKONCE)
+ sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
+ else
+ ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
+}
+
+
+/* This function generates code for -fpic that loads %ebx with
+ the return address of the caller and then returns. */
+
+void
+ix86_file_end (void)
+{
+ rtx xops[2];
+ int regno;
+
+ for (regno = 0; regno < 8; ++regno)
+ {
+ char name[32];
+
+ if (! ((pic_labels_used >> regno) & 1))
+ continue;
+
+ get_pc_thunk_name (name, regno);
+
+#if TARGET_MACHO
+ if (TARGET_MACHO)
+ {
+ switch_to_section (darwin_sections[text_coal_section]);
+ fputs ("\t.weak_definition\t", asm_out_file);
+ assemble_name (asm_out_file, name);
+ fputs ("\n\t.private_extern\t", asm_out_file);
+ assemble_name (asm_out_file, name);
+ fputs ("\n", asm_out_file);
+ ASM_OUTPUT_LABEL (asm_out_file, name);
+ }
+ else
+#endif
+ if (USE_HIDDEN_LINKONCE)
+ {
+ tree decl;
+
+ decl = build_decl (FUNCTION_DECL, get_identifier (name),
+ error_mark_node);
+ TREE_PUBLIC (decl) = 1;
+ TREE_STATIC (decl) = 1;
+ DECL_ONE_ONLY (decl) = 1;
+
+ (*targetm.asm_out.unique_section) (decl, 0);
+ switch_to_section (get_named_section (decl, NULL, 0));
+
+ (*targetm.asm_out.globalize_label) (asm_out_file, name);
+ fputs ("\t.hidden\t", asm_out_file);
+ assemble_name (asm_out_file, name);
+ fputc ('\n', asm_out_file);
+ ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
+ }
+ else
+ {
+ switch_to_section (text_section);
+ ASM_OUTPUT_LABEL (asm_out_file, name);
+ }
+
+ xops[0] = gen_rtx_REG (Pmode, regno);
+ xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+ output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
+ output_asm_insn ("ret", xops);
+ }
+
+ if (NEED_INDICATE_EXEC_STACK)
+ file_end_indicate_exec_stack ();
+}
+
+/* Emit code for the SET_GOT patterns. */
+
+const char *
+output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
+{
+ rtx xops[3];
+
+ xops[0] = dest;
+
+ if (TARGET_VXWORKS_RTP && flag_pic)
+ {
+ /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
+ xops[2] = gen_rtx_MEM (Pmode,
+ gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
+ output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
+
+ /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
+ Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
+ an unadorned address. */
+ xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
+ SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
+ output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
+ return "";
+ }
+
+ xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
+
+ if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
+ {
+ xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
+
+ if (!flag_pic)
+ output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
+ else
+ output_asm_insn ("call\t%a2", xops);
+
+#if TARGET_MACHO
+ /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
+ is what will be referenced by the Mach-O PIC subsystem. */
+ if (!label)
+ ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
+#endif
+
+ (*targetm.asm_out.internal_label) (asm_out_file, "L",
+ CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
+
+ if (flag_pic)
+ output_asm_insn ("pop%z0\t%0", xops);
+ }
+ else
+ {
+ char name[32];
+ get_pc_thunk_name (name, REGNO (dest));
+ pic_labels_used |= 1 << REGNO (dest);
+
+ xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
+ xops[2] = gen_rtx_MEM (QImode, xops[2]);
+ output_asm_insn ("call\t%X2", xops);
+ /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
+ is what will be referenced by the Mach-O PIC subsystem. */
+#if TARGET_MACHO
+ if (!label)
+ ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
+ else
+ targetm.asm_out.internal_label (asm_out_file, "L",
+ CODE_LABEL_NUMBER (label));
+#endif
+ }
+
+ if (TARGET_MACHO)
+ return "";
+
+ if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
+ output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
+ else
+ output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
+
+ return "";
+}
+
+/* Generate an "push" pattern for input ARG. */
+
+static rtx
+gen_push (rtx arg)
+{
+ if (ix86_cfa_state->reg == stack_pointer_rtx)
+ ix86_cfa_state->offset += UNITS_PER_WORD;
+
+ return gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (Pmode,
+ gen_rtx_PRE_DEC (Pmode,
+ stack_pointer_rtx)),
+ arg);
+}
+
+/* Return >= 0 if there is an unused call-clobbered register available
+ for the entire function. */
+
+static unsigned int
+ix86_select_alt_pic_regnum (void)
+{
+ if (current_function_is_leaf && !crtl->profile
+ && !ix86_current_function_calls_tls_descriptor)
+ {
+ int i, drap;
+ /* Can't use the same register for both PIC and DRAP. */
+ if (crtl->drap_reg)
+ drap = REGNO (crtl->drap_reg);
+ else
+ drap = -1;
+ for (i = 2; i >= 0; --i)
+ if (i != drap && !df_regs_ever_live_p (i))
+ return i;
+ }
+
+ return INVALID_REGNUM;
+}
+
+/* Return 1 if we need to save REGNO. */
+static int
+ix86_save_reg (unsigned int regno, int maybe_eh_return)
+{
+ if (pic_offset_table_rtx
+ && regno == REAL_PIC_OFFSET_TABLE_REGNUM
+ && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
+ || crtl->profile
+ || crtl->calls_eh_return
+ || crtl->uses_const_pool))
+ {
+ if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
+ return 0;
+ return 1;
+ }
+
+ if (crtl->calls_eh_return && maybe_eh_return)
+ {
+ unsigned i;
+ for (i = 0; ; i++)
+ {
+ unsigned test = EH_RETURN_DATA_REGNO (i);
+ if (test == INVALID_REGNUM)
+ break;
+ if (test == regno)
+ return 1;
+ }
+ }
+
+ if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
+ return 1;
+
+ return (df_regs_ever_live_p (regno)
+ && !call_used_regs[regno]
+ && !fixed_regs[regno]
+ && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
+}
+
+/* Return number of saved general prupose registers. */
+
+static int
+ix86_nsaved_regs (void)
+{
+ int nregs = 0;
+ int regno;
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+ nregs ++;
+ return nregs;
+}
+
+/* Return number of saved SSE registrers. */
+
+static int
+ix86_nsaved_sseregs (void)
+{
+ int nregs = 0;
+ int regno;
+
+ if (ix86_cfun_abi () != MS_ABI)
+ return 0;
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+ nregs ++;
+ return nregs;
+}
+
+/* Given FROM and TO register numbers, say whether this elimination is
+ allowed. If stack alignment is needed, we can only replace argument
+ pointer with hard frame pointer, or replace frame pointer with stack
+ pointer. Otherwise, frame pointer elimination is automatically
+ handled and all other eliminations are valid. */
+
+int
+ix86_can_eliminate (int from, int to)
+{
+ if (stack_realign_fp)
+ return ((from == ARG_POINTER_REGNUM
+ && to == HARD_FRAME_POINTER_REGNUM)
+ || (from == FRAME_POINTER_REGNUM
+ && to == STACK_POINTER_REGNUM));
+ else
+ return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
+}
+
+/* Return the offset between two registers, one to be eliminated, and the other
+ its replacement, at the start of a routine. */
+
+HOST_WIDE_INT
+ix86_initial_elimination_offset (int from, int to)
+{
+ struct ix86_frame frame;
+ ix86_compute_frame_layout (&frame);
+
+ if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+ return frame.hard_frame_pointer_offset;
+ else if (from == FRAME_POINTER_REGNUM
+ && to == HARD_FRAME_POINTER_REGNUM)
+ return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
+ else
+ {
+ gcc_assert (to == STACK_POINTER_REGNUM);
+
+ if (from == ARG_POINTER_REGNUM)
+ return frame.stack_pointer_offset;
+
+ gcc_assert (from == FRAME_POINTER_REGNUM);
+ return frame.stack_pointer_offset - frame.frame_pointer_offset;
+ }
+}
+
+/* In a dynamically-aligned function, we can't know the offset from
+ stack pointer to frame pointer, so we must ensure that setjmp
+ eliminates fp against the hard fp (%ebp) rather than trying to
+ index from %esp up to the top of the frame across a gap that is
+ of unknown (at compile-time) size. */
+static rtx
+ix86_builtin_setjmp_frame_value (void)
+{
+ return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
+}
+
+/* Fill structure ix86_frame about frame of currently computed function. */
+
+static void
+ix86_compute_frame_layout (struct ix86_frame *frame)
+{
+ HOST_WIDE_INT total_size;
+ unsigned int stack_alignment_needed;
+ HOST_WIDE_INT offset;
+ unsigned int preferred_alignment;
+ HOST_WIDE_INT size = get_frame_size ();
+
+ frame->nregs = ix86_nsaved_regs ();
+ frame->nsseregs = ix86_nsaved_sseregs ();
+ total_size = size;
+
+ stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
+ preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
+
+ /* MS ABI seem to require stack alignment to be always 16 except for function
+ prologues. */
+ if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
+ {
+ preferred_alignment = 16;
+ stack_alignment_needed = 16;
+ crtl->preferred_stack_boundary = 128;
+ crtl->stack_alignment_needed = 128;
+ }
+
+ gcc_assert (!size || stack_alignment_needed);
+ gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
+ gcc_assert (preferred_alignment <= stack_alignment_needed);
+
+ /* During reload iteration the amount of registers saved can change.
+ Recompute the value as needed. Do not recompute when amount of registers
+ didn't change as reload does multiple calls to the function and does not
+ expect the decision to change within single iteration. */
+ if (!optimize_function_for_size_p (cfun)
+ && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
+ {
+ int count = frame->nregs;
+
+ cfun->machine->use_fast_prologue_epilogue_nregs = count;
+ /* The fast prologue uses move instead of push to save registers. This
+ is significantly longer, but also executes faster as modern hardware
+ can execute the moves in parallel, but can't do that for push/pop.
+
+ Be careful about choosing what prologue to emit: When function takes
+ many instructions to execute we may use slow version as well as in
+ case function is known to be outside hot spot (this is known with
+ feedback only). Weight the size of function by number of registers
+ to save as it is cheap to use one or two push instructions but very
+ slow to use many of them. */
+ if (count)
+ count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
+ if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
+ || (flag_branch_probabilities
+ && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
+ cfun->machine->use_fast_prologue_epilogue = false;
+ else
+ cfun->machine->use_fast_prologue_epilogue
+ = !expensive_function_p (count);
+ }
+ if (TARGET_PROLOGUE_USING_MOVE
+ && cfun->machine->use_fast_prologue_epilogue)
+ frame->save_regs_using_mov = true;
+ else
+ frame->save_regs_using_mov = false;
+
+
+ /* Skip return address and saved base pointer. */
+ offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
+
+ frame->hard_frame_pointer_offset = offset;
+
+ /* Set offset to aligned because the realigned frame starts from
+ here. */
+ if (stack_realign_fp)
+ offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
+
+ /* Register save area */
+ offset += frame->nregs * UNITS_PER_WORD;
+
+ /* Align SSE reg save area. */
+ if (frame->nsseregs)
+ frame->padding0 = ((offset + 16 - 1) & -16) - offset;
+ else
+ frame->padding0 = 0;
+
+ /* SSE register save area. */
+ offset += frame->padding0 + frame->nsseregs * 16;
+
+ /* Va-arg area */
+ frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
+ offset += frame->va_arg_size;
+
+ /* Align start of frame for local function. */
+ frame->padding1 = ((offset + stack_alignment_needed - 1)
+ & -stack_alignment_needed) - offset;
+
+ offset += frame->padding1;
+
+ /* Frame pointer points here. */
+ frame->frame_pointer_offset = offset;
+
+ offset += size;
+
+ /* Add outgoing arguments area. Can be skipped if we eliminated
+ all the function calls as dead code.
+ Skipping is however impossible when function calls alloca. Alloca
+ expander assumes that last crtl->outgoing_args_size
+ of stack frame are unused. */
+ if (ACCUMULATE_OUTGOING_ARGS
+ && (!current_function_is_leaf || cfun->calls_alloca
+ || ix86_current_function_calls_tls_descriptor))
+ {
+ offset += crtl->outgoing_args_size;
+ frame->outgoing_arguments_size = crtl->outgoing_args_size;
+ }
+ else
+ frame->outgoing_arguments_size = 0;
+
+ /* Align stack boundary. Only needed if we're calling another function
+ or using alloca. */
+ if (!current_function_is_leaf || cfun->calls_alloca
+ || ix86_current_function_calls_tls_descriptor)
+ frame->padding2 = ((offset + preferred_alignment - 1)
+ & -preferred_alignment) - offset;
+ else
+ frame->padding2 = 0;
+
+ offset += frame->padding2;
+
+ /* We've reached end of stack frame. */
+ frame->stack_pointer_offset = offset;
+
+ /* Size prologue needs to allocate. */
+ frame->to_allocate =
+ (size + frame->padding1 + frame->padding2
+ + frame->outgoing_arguments_size + frame->va_arg_size);
+
+ if ((!frame->to_allocate && frame->nregs <= 1)
+ || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
+ frame->save_regs_using_mov = false;
+
+ if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
+ && current_function_is_leaf
+ && !ix86_current_function_calls_tls_descriptor)
+ {
+ frame->red_zone_size = frame->to_allocate;
+ if (frame->save_regs_using_mov)
+ frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
+ if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
+ frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
+ }
+ else
+ frame->red_zone_size = 0;
+ frame->to_allocate -= frame->red_zone_size;
+ frame->stack_pointer_offset -= frame->red_zone_size;
+#if 0
+ fprintf (stderr, "\n");
+ fprintf (stderr, "size: %ld\n", (long)size);
+ fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
+ fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
+ fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
+ fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
+ fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
+ fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
+ fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
+ fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
+ fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
+ fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
+ fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
+ (long)frame->hard_frame_pointer_offset);
+ fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
+ fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
+ fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
+ fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
+#endif
+}
+
+/* Emit code to save registers in the prologue. */
+
+static void
+ix86_emit_save_regs (void)
+{
+ unsigned int regno;
+ rtx insn;
+
+ for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+ {
+ insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+}
+
+/* Emit code to save registers using MOV insns. First register
+ is restored from POINTER + OFFSET. */
+static void
+ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
+{
+ unsigned int regno;
+ rtx insn;
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+ {
+ insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
+ Pmode, offset),
+ gen_rtx_REG (Pmode, regno));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ offset += UNITS_PER_WORD;
+ }
+}
+
+/* Emit code to save registers using MOV insns. First register
+ is restored from POINTER + OFFSET. */
+static void
+ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
+{
+ unsigned int regno;
+ rtx insn;
+ rtx mem;
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+ {
+ mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
+ set_mem_align (mem, 128);
+ insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ offset += 16;
+ }
+}
+
+static GTY(()) rtx queued_cfa_restores;
+
+/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
+ manipulation insn. Don't add it if the previously
+ saved value will be left untouched within stack red-zone till return,
+ as unwinders can find the same value in the register and
+ on the stack. */
+
+static void
+ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
+{
+ if (TARGET_RED_ZONE
+ && !TARGET_64BIT_MS_ABI
+ && red_offset + RED_ZONE_SIZE >= 0
+ && crtl->args.pops_args < 65536)
+ return;
+
+ if (insn)
+ {
+ add_reg_note (insn, REG_CFA_RESTORE, reg);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+ else
+ queued_cfa_restores
+ = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
+}
+
+/* Add queued REG_CFA_RESTORE notes if any to INSN. */
+
+static void
+ix86_add_queued_cfa_restore_notes (rtx insn)
+{
+ rtx last;
+ if (!queued_cfa_restores)
+ return;
+ for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
+ ;
+ XEXP (last, 1) = REG_NOTES (insn);
+ REG_NOTES (insn) = queued_cfa_restores;
+ queued_cfa_restores = NULL_RTX;
+ RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Expand prologue or epilogue stack adjustment.
+ The pattern exist to put a dependency on all ebp-based memory accesses.
+ STYLE should be negative if instructions should be marked as frame related,
+ zero if %r11 register is live and cannot be freely used and positive
+ otherwise. */
+
+static void
+pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
+ int style, bool set_cfa)
+{
+ rtx insn;
+
+ if (! TARGET_64BIT)
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
+ else if (x86_64_immediate_operand (offset, DImode))
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
+ else
+ {
+ rtx r11;
+ /* r11 is used by indirect sibcall return as well, set before the
+ epilogue and used after the epilogue. ATM indirect sibcall
+ shouldn't be used together with huge frame sizes in one
+ function because of the frame_size check in sibcall.c. */
+ gcc_assert (style);
+ r11 = gen_rtx_REG (DImode, R11_REG);
+ insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
+ if (style < 0)
+ RTX_FRAME_RELATED_P (insn) = 1;
+ insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
+ offset));
+ }
+
+ if (style >= 0)
+ ix86_add_queued_cfa_restore_notes (insn);
+
+ if (set_cfa)
+ {
+ rtx r;
+
+ gcc_assert (ix86_cfa_state->reg == src);
+ ix86_cfa_state->offset += INTVAL (offset);
+ ix86_cfa_state->reg = dest;
+
+ r = gen_rtx_PLUS (Pmode, src, offset);
+ r = gen_rtx_SET (VOIDmode, dest, r);
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+ else if (style < 0)
+ RTX_FRAME_RELATED_P (insn) = 1;
+}
+
+/* Find an available register to be used as dynamic realign argument
+ pointer regsiter. Such a register will be written in prologue and
+ used in begin of body, so it must not be
+ 1. parameter passing register.
+ 2. GOT pointer.
+ We reuse static-chain register if it is available. Otherwise, we
+ use DI for i386 and R13 for x86-64. We chose R13 since it has
+ shorter encoding.
+
+ Return: the regno of chosen register. */
+
+static unsigned int
+find_drap_reg (void)
+{
+ tree decl = cfun->decl;
+
+ if (TARGET_64BIT)
+ {
+ /* Use R13 for nested function or function need static chain.
+ Since function with tail call may use any caller-saved
+ registers in epilogue, DRAP must not use caller-saved
+ register in such case. */
+ if ((decl_function_context (decl)
+ && !DECL_NO_STATIC_CHAIN (decl))
+ || crtl->tail_call_emit)
+ return R13_REG;
+
+ return R10_REG;
+ }
+ else
+ {
+ /* Use DI for nested function or function need static chain.
+ Since function with tail call may use any caller-saved
+ registers in epilogue, DRAP must not use caller-saved
+ register in such case. */
+ if ((decl_function_context (decl)
+ && !DECL_NO_STATIC_CHAIN (decl))
+ || crtl->tail_call_emit)
+ return DI_REG;
+
+ /* Reuse static chain register if it isn't used for parameter
+ passing. */
+ if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
+ && !lookup_attribute ("fastcall",
+ TYPE_ATTRIBUTES (TREE_TYPE (decl))))
+ return CX_REG;
+ else
+ return DI_REG;
+ }
+}
+
+/* Update incoming stack boundary and estimated stack alignment. */
+
+static void
+ix86_update_stack_boundary (void)
+{
+ /* Prefer the one specified at command line. */
+ ix86_incoming_stack_boundary
+ = (ix86_user_incoming_stack_boundary
+ ? ix86_user_incoming_stack_boundary
+ : ix86_default_incoming_stack_boundary);
+
+ /* Incoming stack alignment can be changed on individual functions
+ via force_align_arg_pointer attribute. We use the smallest
+ incoming stack boundary. */
+ if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
+ && lookup_attribute (ix86_force_align_arg_pointer_string,
+ TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
+ ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
+
+ /* The incoming stack frame has to be aligned at least at
+ parm_stack_boundary. */
+ if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
+ ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
+
+ /* Stack at entrance of main is aligned by runtime. We use the
+ smallest incoming stack boundary. */
+ if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
+ && DECL_NAME (current_function_decl)
+ && MAIN_NAME_P (DECL_NAME (current_function_decl))
+ && DECL_FILE_SCOPE_P (current_function_decl))
+ ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
+
+ /* x86_64 vararg needs 16byte stack alignment for register save
+ area. */
+ if (TARGET_64BIT
+ && cfun->stdarg
+ && crtl->stack_alignment_estimated < 128)
+ crtl->stack_alignment_estimated = 128;
+}
+
+/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
+ needed or an rtx for DRAP otherwise. */
+
+static rtx
+ix86_get_drap_rtx (void)
+{
+ if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
+ crtl->need_drap = true;
+
+ if (stack_realign_drap)
+ {
+ /* Assign DRAP to vDRAP and returns vDRAP */
+ unsigned int regno = find_drap_reg ();
+ rtx drap_vreg;
+ rtx arg_ptr;
+ rtx seq, insn;
+
+ arg_ptr = gen_rtx_REG (Pmode, regno);
+ crtl->drap_reg = arg_ptr;
+
+ start_sequence ();
+ drap_vreg = copy_to_reg (arg_ptr);
+ seq = get_insns ();
+ end_sequence ();
+
+ insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ return drap_vreg;
+ }
+ else
+ return NULL;
+}
+
+/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
+
+static rtx
+ix86_internal_arg_pointer (void)
+{
+ return virtual_incoming_args_rtx;
+}
+
+/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
+ to be generated in correct form. */
+static void
+ix86_finalize_stack_realign_flags (void)
+{
+ /* Check if stack realign is really needed after reload, and
+ stores result in cfun */
+ unsigned int incoming_stack_boundary
+ = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
+ ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
+ unsigned int stack_realign = (incoming_stack_boundary
+ < (current_function_is_leaf
+ ? crtl->max_used_stack_slot_alignment
+ : crtl->stack_alignment_needed));
+
+ if (crtl->stack_realign_finalized)
+ {
+ /* After stack_realign_needed is finalized, we can't no longer
+ change it. */
+ gcc_assert (crtl->stack_realign_needed == stack_realign);
+ }
+ else
+ {
+ crtl->stack_realign_needed = stack_realign;
+ crtl->stack_realign_finalized = true;
+ }
+}
+
+/* Expand the prologue into a bunch of separate insns. */
+
+void
+ix86_expand_prologue (void)
+{
+ rtx insn;
+ bool pic_reg_used;
+ struct ix86_frame frame;
+ HOST_WIDE_INT allocate;
+
+ ix86_finalize_stack_realign_flags ();
+
+ /* DRAP should not coexist with stack_realign_fp */
+ gcc_assert (!(crtl->drap_reg && stack_realign_fp));
+
+ /* Initialize CFA state for before the prologue. */
+ ix86_cfa_state->reg = stack_pointer_rtx;
+ ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
+
+ ix86_compute_frame_layout (&frame);
+
+ /* Emit prologue code to adjust stack alignment and setup DRAP, in case
+ of DRAP is needed and stack realignment is really needed after reload */
+ if (crtl->drap_reg && crtl->stack_realign_needed)
+ {
+ rtx x, y;
+ int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
+ int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
+ ? 0 : UNITS_PER_WORD);
+
+ gcc_assert (stack_realign_drap);
+
+ /* Grab the argument pointer. */
+ x = plus_constant (stack_pointer_rtx,
+ (UNITS_PER_WORD + param_ptr_offset));
+ y = crtl->drap_reg;
+
+ /* Only need to push parameter pointer reg if it is caller
+ saved reg */
+ if (!call_used_regs[REGNO (crtl->drap_reg)])
+ {
+ /* Push arg pointer reg */
+ insn = emit_insn (gen_push (y));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+
+ insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ ix86_cfa_state->reg = crtl->drap_reg;
+
+ /* Align the stack. */
+ insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (-align_bytes)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ /* Replicate the return address on the stack so that return
+ address can be reached via (argp - 1) slot. This is needed
+ to implement macro RETURN_ADDR_RTX and intrinsic function
+ expand_builtin_return_addr etc. */
+ x = crtl->drap_reg;
+ x = gen_frame_mem (Pmode,
+ plus_constant (x, -UNITS_PER_WORD));
+ insn = emit_insn (gen_push (x));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+
+ /* Note: AT&T enter does NOT have reversed args. Enter is probably
+ slower on all targets. Also sdb doesn't like it. */
+
+ if (frame_pointer_needed)
+ {
+ insn = emit_insn (gen_push (hard_frame_pointer_rtx));
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ if (ix86_cfa_state->reg == stack_pointer_rtx)
+ ix86_cfa_state->reg = hard_frame_pointer_rtx;
+ }
+
+ if (stack_realign_fp)
+ {
+ int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
+ gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
+
+ /* Align the stack. */
+ insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (-align_bytes)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+
+ allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
+
+ if (!frame.save_regs_using_mov)
+ ix86_emit_save_regs ();
+ else
+ allocate += frame.nregs * UNITS_PER_WORD;
+
+ /* When using red zone we may start register saving before allocating
+ the stack frame saving one cycle of the prologue. However I will
+ avoid doing this if I am going to have to probe the stack since
+ at least on x86_64 the stack probe can turn into a call that clobbers
+ a red zone location */
+ if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
+ && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
+ ix86_emit_save_regs_using_mov ((frame_pointer_needed
+ && !crtl->stack_realign_needed)
+ ? hard_frame_pointer_rtx
+ : stack_pointer_rtx,
+ -frame.nregs * UNITS_PER_WORD);
+
+ if (allocate == 0)
+ ;
+ else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-allocate), -1,
+ ix86_cfa_state->reg == stack_pointer_rtx);
+ else
+ {
+ rtx eax = gen_rtx_REG (Pmode, AX_REG);
+ bool eax_live;
+ rtx t;
+
+ if (cfun->machine->call_abi == MS_ABI)
+ eax_live = false;
+ else
+ eax_live = ix86_eax_live_at_start_p ();
+
+ if (eax_live)
+ {
+ emit_insn (gen_push (eax));
+ allocate -= UNITS_PER_WORD;
+ }
+
+ emit_move_insn (eax, GEN_INT (allocate));
+
+ if (TARGET_64BIT)
+ insn = gen_allocate_stack_worker_64 (eax, eax);
+ else
+ insn = gen_allocate_stack_worker_32 (eax, eax);
+ insn = emit_insn (insn);
+
+ if (ix86_cfa_state->reg == stack_pointer_rtx)
+ {
+ ix86_cfa_state->offset += allocate;
+ t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
+ t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+
+ if (eax_live)
+ {
+ if (frame_pointer_needed)
+ t = plus_constant (hard_frame_pointer_rtx,
+ allocate
+ - frame.to_allocate
+ - frame.nregs * UNITS_PER_WORD);
+ else
+ t = plus_constant (stack_pointer_rtx, allocate);
+ emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
+ }
+ }
+
+ if (frame.save_regs_using_mov
+ && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
+ && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
+ {
+ if (!frame_pointer_needed
+ || !frame.to_allocate
+ || crtl->stack_realign_needed)
+ ix86_emit_save_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate
+ + frame.nsseregs * 16 + frame.padding0);
+ else
+ ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
+ -frame.nregs * UNITS_PER_WORD);
+ }
+ if (!frame_pointer_needed
+ || !frame.to_allocate
+ || crtl->stack_realign_needed)
+ ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate);
+ else
+ ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
+ - frame.nregs * UNITS_PER_WORD
+ - frame.nsseregs * 16
+ - frame.padding0);
+
+ pic_reg_used = false;
+ if (pic_offset_table_rtx
+ && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
+ || crtl->profile))
+ {
+ unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
+
+ if (alt_pic_reg_used != INVALID_REGNUM)
+ SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
+
+ pic_reg_used = true;
+ }
+
+ if (pic_reg_used)
+ {
+ if (TARGET_64BIT)
+ {
+ if (ix86_cmodel == CM_LARGE_PIC)
+ {
+ rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
+ rtx label = gen_label_rtx ();
+ emit_label (label);
+ LABEL_PRESERVE_P (label) = 1;
+ gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
+ insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
+ insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
+ insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
+ pic_offset_table_rtx, tmp_reg));
+ }
+ else
+ insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
+ }
+ else
+ insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+ }
+
+ /* In the pic_reg_used case, make sure that the got load isn't deleted
+ when mcount needs it. Blockage to avoid call movement across mcount
+ call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
+ note. */
+ if (crtl->profile && pic_reg_used)
+ emit_insn (gen_prologue_use (pic_offset_table_rtx));
+
+ if (crtl->drap_reg && !crtl->stack_realign_needed)
+ {
+ /* vDRAP is setup but after reload it turns out stack realign
+ isn't necessary, here we will emit prologue to setup DRAP
+ without stack realign adjustment */
+ int drap_bp_offset = UNITS_PER_WORD * 2;
+ rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
+ insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
+ }
+
+ /* Prevent instructions from being scheduled into register save push
+ sequence when access to the redzone area is done through frame pointer.
+ The offset betweeh the frame pointer and the stack pointer is calculated
+ relative to the value of the stack pointer at the end of the function
+ prologue, and moving instructions that access redzone area via frame
+ pointer inside push sequence violates this assumption. */
+ if (frame_pointer_needed && frame.red_zone_size)
+ emit_insn (gen_memory_blockage ());
+
+ /* Emit cld instruction if stringops are used in the function. */
+ if (TARGET_CLD && ix86_current_function_needs_cld)
+ emit_insn (gen_cld ());
+}
+
+/* Emit code to restore REG using a POP insn. */
+
+static void
+ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
+{
+ rtx insn = emit_insn (ix86_gen_pop1 (reg));
+
+ if (ix86_cfa_state->reg == crtl->drap_reg
+ && REGNO (reg) == REGNO (crtl->drap_reg))
+ {
+ /* Previously we'd represented the CFA as an expression
+ like *(%ebp - 8). We've just popped that value from
+ the stack, which means we need to reset the CFA to
+ the drap register. This will remain until we restore
+ the stack pointer. */
+ add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ return;
+ }
+
+ if (ix86_cfa_state->reg == stack_pointer_rtx)
+ {
+ ix86_cfa_state->offset -= UNITS_PER_WORD;
+ add_reg_note (insn, REG_CFA_ADJUST_CFA,
+ copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+
+ /* When the frame pointer is the CFA, and we pop it, we are
+ swapping back to the stack pointer as the CFA. This happens
+ for stack frames that don't allocate other data, so we assume
+ the stack pointer is now pointing at the return address, i.e.
+ the function entry state, which makes the offset be 1 word. */
+ else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
+ && reg == hard_frame_pointer_rtx)
+ {
+ ix86_cfa_state->reg = stack_pointer_rtx;
+ ix86_cfa_state->offset = UNITS_PER_WORD;
+
+ add_reg_note (insn, REG_CFA_DEF_CFA,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (UNITS_PER_WORD)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+
+ ix86_add_cfa_restore_note (insn, reg, red_offset);
+}
+
+/* Emit code to restore saved registers using POP insns. */
+
+static void
+ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
+{
+ int regno;
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
+ {
+ ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
+ red_offset);
+ red_offset += UNITS_PER_WORD;
+ }
+}
+
+/* Emit code and notes for the LEAVE instruction. */
+
+static void
+ix86_emit_leave (HOST_WIDE_INT red_offset)
+{
+ rtx insn = emit_insn (ix86_gen_leave ());
+
+ ix86_add_queued_cfa_restore_notes (insn);
+
+ if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
+ {
+ add_reg_note (insn, REG_CFA_ADJUST_CFA,
+ copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
+ }
+}
+
+/* Emit code to restore saved registers using MOV insns. First register
+ is restored from POINTER + OFFSET. */
+static void
+ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
+ HOST_WIDE_INT red_offset,
+ int maybe_eh_return)
+{
+ unsigned int regno;
+ rtx base_address = gen_rtx_MEM (Pmode, pointer);
+ rtx insn;
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
+ {
+ rtx reg = gen_rtx_REG (Pmode, regno);
+
+ /* Ensure that adjust_address won't be forced to produce pointer
+ out of range allowed by x86-64 instruction set. */
+ if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
+ {
+ rtx r11;
+
+ r11 = gen_rtx_REG (DImode, R11_REG);
+ emit_move_insn (r11, GEN_INT (offset));
+ emit_insn (gen_adddi3 (r11, r11, pointer));
+ base_address = gen_rtx_MEM (Pmode, r11);
+ offset = 0;
+ }
+ insn = emit_move_insn (reg,
+ adjust_address (base_address, Pmode, offset));
+ offset += UNITS_PER_WORD;
+
+ if (ix86_cfa_state->reg == crtl->drap_reg
+ && regno == REGNO (crtl->drap_reg))
+ {
+ /* Previously we'd represented the CFA as an expression
+ like *(%ebp - 8). We've just popped that value from
+ the stack, which means we need to reset the CFA to
+ the drap register. This will remain until we restore
+ the stack pointer. */
+ add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+ else
+ ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
+
+ red_offset += UNITS_PER_WORD;
+ }
+}
+
+/* Emit code to restore saved registers using MOV insns. First register
+ is restored from POINTER + OFFSET. */
+static void
+ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
+ HOST_WIDE_INT red_offset,
+ int maybe_eh_return)
+{
+ int regno;
+ rtx base_address = gen_rtx_MEM (TImode, pointer);
+ rtx mem, insn;
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
+ {
+ rtx reg = gen_rtx_REG (TImode, regno);
+
+ /* Ensure that adjust_address won't be forced to produce pointer
+ out of range allowed by x86-64 instruction set. */
+ if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
+ {
+ rtx r11;
+
+ r11 = gen_rtx_REG (DImode, R11_REG);
+ emit_move_insn (r11, GEN_INT (offset));
+ emit_insn (gen_adddi3 (r11, r11, pointer));
+ base_address = gen_rtx_MEM (TImode, r11);
+ offset = 0;
+ }
+ mem = adjust_address (base_address, TImode, offset);
+ set_mem_align (mem, 128);
+ insn = emit_move_insn (reg, mem);
+ offset += 16;
+
+ ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
+
+ red_offset += 16;
+ }
+}
+
+/* Restore function stack, frame, and registers. */
+
+void
+ix86_expand_epilogue (int style)
+{
+ int sp_valid;
+ struct ix86_frame frame;
+ HOST_WIDE_INT offset, red_offset;
+ struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
+ bool using_drap;
+
+ ix86_finalize_stack_realign_flags ();
+
+ /* When stack is realigned, SP must be valid. */
+ sp_valid = (!frame_pointer_needed
+ || current_function_sp_is_unchanging
+ || stack_realign_fp);
+
+ ix86_compute_frame_layout (&frame);
+
+ /* See the comment about red zone and frame
+ pointer usage in ix86_expand_prologue. */
+ if (frame_pointer_needed && frame.red_zone_size)
+ emit_insn (gen_memory_blockage ());
+
+ using_drap = crtl->drap_reg && crtl->stack_realign_needed;
+ gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
+
+ /* Calculate start of saved registers relative to ebp. Special care
+ must be taken for the normal return case of a function using
+ eh_return: the eax and edx registers are marked as saved, but not
+ restored along this path. */
+ offset = frame.nregs;
+ if (crtl->calls_eh_return && style != 2)
+ offset -= 2;
+ offset *= -UNITS_PER_WORD;
+ offset -= frame.nsseregs * 16 + frame.padding0;
+
+ /* Calculate start of saved registers relative to esp on entry of the
+ function. When realigning stack, this needs to be the most negative
+ value possible at runtime. */
+ red_offset = offset;
+ if (using_drap)
+ red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
+ + UNITS_PER_WORD;
+ else if (stack_realign_fp)
+ red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
+ - UNITS_PER_WORD;
+ if (frame_pointer_needed)
+ red_offset -= UNITS_PER_WORD;
+
+ /* If we're only restoring one register and sp is not valid then
+ using a move instruction to restore the register since it's
+ less work than reloading sp and popping the register.
+
+ The default code result in stack adjustment using add/lea instruction,
+ while this code results in LEAVE instruction (or discrete equivalent),
+ so it is profitable in some other cases as well. Especially when there
+ are no registers to restore. We also use this code when TARGET_USE_LEAVE
+ and there is exactly one register to pop. This heuristic may need some
+ tuning in future. */
+ if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
+ || (TARGET_EPILOGUE_USING_MOVE
+ && cfun->machine->use_fast_prologue_epilogue
+ && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
+ || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
+ && frame.to_allocate)
+ || (frame_pointer_needed && TARGET_USE_LEAVE
+ && cfun->machine->use_fast_prologue_epilogue
+ && (frame.nregs + frame.nsseregs) == 1)
+ || crtl->calls_eh_return)
+ {
+ /* Restore registers. We can use ebp or esp to address the memory
+ locations. If both are available, default to ebp, since offsets
+ are known to be small. Only exception is esp pointing directly
+ to the end of block of saved registers, where we may simplify
+ addressing mode.
+
+ If we are realigning stack with bp and sp, regs restore can't
+ be addressed by bp. sp must be used instead. */
+
+ if (!frame_pointer_needed
+ || (sp_valid && !frame.to_allocate)
+ || stack_realign_fp)
+ {
+ ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate, red_offset,
+ style == 2);
+ ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate
+ + frame.nsseregs * 16
+ + frame.padding0,
+ red_offset
+ + frame.nsseregs * 16
+ + frame.padding0, style == 2);
+ }
+ else
+ {
+ ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
+ offset, red_offset,
+ style == 2);
+ ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
+ offset
+ + frame.nsseregs * 16
+ + frame.padding0,
+ red_offset
+ + frame.nsseregs * 16
+ + frame.padding0, style == 2);
+ }
+
+ red_offset -= offset;
+
+ /* eh_return epilogues need %ecx added to the stack pointer. */
+ if (style == 2)
+ {
+ rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
+
+ /* Stack align doesn't work with eh_return. */
+ gcc_assert (!crtl->stack_realign_needed);
+
+ if (frame_pointer_needed)
+ {
+ tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
+ tmp = plus_constant (tmp, UNITS_PER_WORD);
+ tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
+
+ tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
+ tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
+
+ /* Note that we use SA as a temporary CFA, as the return
+ address is at the proper place relative to it. We
+ pretend this happens at the FP restore insn because
+ prior to this insn the FP would be stored at the wrong
+ offset relative to SA, and after this insn we have no
+ other reasonable register to use for the CFA. We don't
+ bother resetting the CFA to the SP for the duration of
+ the return insn. */
+ add_reg_note (tmp, REG_CFA_DEF_CFA,
+ plus_constant (sa, UNITS_PER_WORD));
+ ix86_add_queued_cfa_restore_notes (tmp);
+ add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ ix86_cfa_state->reg = sa;
+ ix86_cfa_state->offset = UNITS_PER_WORD;
+
+ pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
+ const0_rtx, style, false);
+ }
+ else
+ {
+ tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
+ tmp = plus_constant (tmp, (frame.to_allocate
+ + frame.nregs * UNITS_PER_WORD
+ + frame.nsseregs * 16
+ + frame.padding0));
+ tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
+ ix86_add_queued_cfa_restore_notes (tmp);
+
+ gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
+ if (ix86_cfa_state->offset != UNITS_PER_WORD)
+ {
+ ix86_cfa_state->offset = UNITS_PER_WORD;
+ add_reg_note (tmp, REG_CFA_DEF_CFA,
+ plus_constant (stack_pointer_rtx,
+ UNITS_PER_WORD));
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ }
+ }
+ }
+ else if (!frame_pointer_needed)
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (frame.to_allocate
+ + frame.nregs * UNITS_PER_WORD
+ + frame.nsseregs * 16
+ + frame.padding0),
+ style, !using_drap);
+ /* If not an i386, mov & pop is faster than "leave". */
+ else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
+ || !cfun->machine->use_fast_prologue_epilogue)
+ ix86_emit_leave (red_offset);
+ else
+ {
+ pro_epilogue_adjust_stack (stack_pointer_rtx,
+ hard_frame_pointer_rtx,
+ const0_rtx, style, !using_drap);
+
+ ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
+ }
+ }
+ else
+ {
+ /* First step is to deallocate the stack frame so that we can
+ pop the registers.
+
+ If we realign stack with frame pointer, then stack pointer
+ won't be able to recover via lea $offset(%bp), %sp, because
+ there is a padding area between bp and sp for realign.
+ "add $to_allocate, %sp" must be used instead. */
+ if (!sp_valid)
+ {
+ gcc_assert (frame_pointer_needed);
+ gcc_assert (!stack_realign_fp);
+ pro_epilogue_adjust_stack (stack_pointer_rtx,
+ hard_frame_pointer_rtx,
+ GEN_INT (offset), style, false);
+ ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate, red_offset,
+ style == 2);
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (frame.nsseregs * 16),
+ style, false);
+ }
+ else if (frame.to_allocate || frame.nsseregs)
+ {
+ ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate, red_offset,
+ style == 2);
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (frame.to_allocate
+ + frame.nsseregs * 16
+ + frame.padding0), style,
+ !using_drap && !frame_pointer_needed);
+ }
+
+ ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
+ + frame.padding0);
+ red_offset -= offset;
+
+ if (frame_pointer_needed)
+ {
+ /* Leave results in shorter dependency chains on CPUs that are
+ able to grok it fast. */
+ if (TARGET_USE_LEAVE)
+ ix86_emit_leave (red_offset);
+ else
+ {
+ /* For stack realigned really happens, recover stack
+ pointer to hard frame pointer is a must, if not using
+ leave. */
+ if (stack_realign_fp)
+ pro_epilogue_adjust_stack (stack_pointer_rtx,
+ hard_frame_pointer_rtx,
+ const0_rtx, style, !using_drap);
+ ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
+ red_offset);
+ }
+ }
+ }
+
+ if (using_drap)
+ {
+ int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
+ ? 0 : UNITS_PER_WORD);
+ rtx insn;
+
+ gcc_assert (stack_realign_drap);
+
+ insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
+ crtl->drap_reg,
+ GEN_INT (-(UNITS_PER_WORD
+ + param_ptr_offset))));
+
+ ix86_cfa_state->reg = stack_pointer_rtx;
+ ix86_cfa_state->offset = UNITS_PER_WORD + param_ptr_offset;
+
+ add_reg_note (insn, REG_CFA_DEF_CFA,
+ gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
+ GEN_INT (ix86_cfa_state->offset)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ if (param_ptr_offset)
+ ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
+ }
+
+ /* Sibcall epilogues don't want a return instruction. */
+ if (style == 0)
+ {
+ *ix86_cfa_state = cfa_state_save;
+ return;
+ }
+
+ if (crtl->args.pops_args && crtl->args.size)
+ {
+ rtx popc = GEN_INT (crtl->args.pops_args);
+
+ /* i386 can only pop 64K bytes. If asked to pop more, pop return
+ address, do explicit add, and jump indirectly to the caller. */
+
+ if (crtl->args.pops_args >= 65536)
+ {
+ rtx ecx = gen_rtx_REG (SImode, CX_REG);
+ rtx insn;
+
+ /* There is no "pascal" calling convention in any 64bit ABI. */
+ gcc_assert (!TARGET_64BIT);
+
+ insn = emit_insn (gen_popsi1 (ecx));
+ ix86_cfa_state->offset -= UNITS_PER_WORD;
+
+ add_reg_note (insn, REG_CFA_ADJUST_CFA,
+ copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
+ add_reg_note (insn, REG_CFA_REGISTER,
+ gen_rtx_SET (VOIDmode, ecx, pc_rtx));
+ RTX_FRAME_RELATED_P (insn) = 1;
+
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ popc, -1, true);
+ emit_jump_insn (gen_return_indirect_internal (ecx));
+ }
+ else
+ emit_jump_insn (gen_return_pop_internal (popc));
+ }
+ else
+ emit_jump_insn (gen_return_internal ());
+
+ /* Restore the state back to the state from the prologue,
+ so that it's correct for the next epilogue. */
+ *ix86_cfa_state = cfa_state_save;
+}
+
+/* Reset from the function's potential modifications. */
+
+static void
+ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+ HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+ if (pic_offset_table_rtx)
+ SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
+#if TARGET_MACHO
+ /* Mach-O doesn't support labels at the end of objects, so if
+ it looks like we might want one, insert a NOP. */
+ {
+ rtx insn = get_last_insn ();
+ while (insn
+ && NOTE_P (insn)
+ && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
+ insn = PREV_INSN (insn);
+ if (insn
+ && (LABEL_P (insn)
+ || (NOTE_P (insn)
+ && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
+ fputs ("\tnop\n", file);
+ }
+#endif
+
+}
+
+/* Extract the parts of an RTL expression that is a valid memory address
+ for an instruction. Return 0 if the structure of the address is
+ grossly off. Return -1 if the address contains ASHIFT, so it is not
+ strictly valid, but still used for computing length of lea instruction. */
+
+int
+ix86_decompose_address (rtx addr, struct ix86_address *out)
+{
+ rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
+ rtx base_reg, index_reg;
+ HOST_WIDE_INT scale = 1;
+ rtx scale_rtx = NULL_RTX;
+ int retval = 1;
+ enum ix86_address_seg seg = SEG_DEFAULT;
+
+ if (REG_P (addr) || GET_CODE (addr) == SUBREG)
+ base = addr;
+ else if (GET_CODE (addr) == PLUS)
+ {
+ rtx addends[4], op;
+ int n = 0, i;
+
+ op = addr;
+ do
+ {
+ if (n >= 4)
+ return 0;
+ addends[n++] = XEXP (op, 1);
+ op = XEXP (op, 0);
+ }
+ while (GET_CODE (op) == PLUS);
+ if (n >= 4)
+ return 0;
+ addends[n] = op;
+
+ for (i = n; i >= 0; --i)
+ {
+ op = addends[i];
+ switch (GET_CODE (op))
+ {
+ case MULT:
+ if (index)
+ return 0;
+ index = XEXP (op, 0);
+ scale_rtx = XEXP (op, 1);
+ break;
+
+ case UNSPEC:
+ if (XINT (op, 1) == UNSPEC_TP
+ && TARGET_TLS_DIRECT_SEG_REFS
+ && seg == SEG_DEFAULT)
+ seg = TARGET_64BIT ? SEG_FS : SEG_GS;
+ else
+ return 0;
+ break;
+
+ case REG:
+ case SUBREG:
+ if (!base)
+ base = op;
+ else if (!index)
+ index = op;
+ else
+ return 0;
+ break;
+
+ case CONST:
+ case CONST_INT:
+ case SYMBOL_REF:
+ case LABEL_REF:
+ if (disp)
+ return 0;
+ disp = op;
+ break;
+
+ default:
+ return 0;
+ }
+ }
+ }
+ else if (GET_CODE (addr) == MULT)
+ {
+ index = XEXP (addr, 0); /* index*scale */
+ scale_rtx = XEXP (addr, 1);
+ }
+ else if (GET_CODE (addr) == ASHIFT)
+ {
+ rtx tmp;
+
+ /* We're called for lea too, which implements ashift on occasion. */
+ index = XEXP (addr, 0);
+ tmp = XEXP (addr, 1);
+ if (!CONST_INT_P (tmp))
+ return 0;
+ scale = INTVAL (tmp);
+ if ((unsigned HOST_WIDE_INT) scale > 3)
+ return 0;
+ scale = 1 << scale;
+ retval = -1;
+ }
+ else
+ disp = addr; /* displacement */
+
+ /* Extract the integral value of scale. */
+ if (scale_rtx)
+ {
+ if (!CONST_INT_P (scale_rtx))
+ return 0;
+ scale = INTVAL (scale_rtx);
+ }
+
+ base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
+ index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
+
+ /* Allow arg pointer and stack pointer as index if there is not scaling. */
+ if (base_reg && index_reg && scale == 1
+ && (index_reg == arg_pointer_rtx
+ || index_reg == frame_pointer_rtx
+ || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
+ {
+ rtx tmp;
+ tmp = base, base = index, index = tmp;
+ tmp = base_reg, base_reg = index_reg, index_reg = tmp;
+ }
+
+ /* Special case: %ebp cannot be encoded as a base without a displacement. */
+ if ((base_reg == hard_frame_pointer_rtx
+ || base_reg == frame_pointer_rtx
+ || base_reg == arg_pointer_rtx) && !disp)
+ disp = const0_rtx;
+
+ /* Special case: on K6, [%esi] makes the instruction vector decoded.
+ Avoid this by transforming to [%esi+0].
+ Reload calls address legitimization without cfun defined, so we need
+ to test cfun for being non-NULL. */
+ if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
+ && base_reg && !index_reg && !disp
+ && REG_P (base_reg)
+ && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
+ disp = const0_rtx;
+
+ /* Special case: encode reg+reg instead of reg*2. */
+ if (!base && index && scale && scale == 2)
+ base = index, base_reg = index_reg, scale = 1;
+
+ /* Special case: scaling cannot be encoded without base or displacement. */
+ if (!base && !disp && index && scale != 1)
+ disp = const0_rtx;
+
+ out->base = base;
+ out->index = index;
+ out->disp = disp;
+ out->scale = scale;
+ out->seg = seg;
+
+ return retval;
+}
+
+/* Return cost of the memory address x.
+ For i386, it is better to use a complex address than let gcc copy
+ the address into a reg and make a new pseudo. But not if the address
+ requires to two regs - that would mean more pseudos with longer
+ lifetimes. */
+static int
+ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
+{
+ struct ix86_address parts;
+ int cost = 1;
+ int ok = ix86_decompose_address (x, &parts);
+
+ gcc_assert (ok);
+
+ if (parts.base && GET_CODE (parts.base) == SUBREG)
+ parts.base = SUBREG_REG (parts.base);
+ if (parts.index && GET_CODE (parts.index) == SUBREG)
+ parts.index = SUBREG_REG (parts.index);
+
+ /* Attempt to minimize number of registers in the address. */
+ if ((parts.base
+ && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
+ || (parts.index
+ && (!REG_P (parts.index)
+ || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
+ cost++;
+
+ if (parts.base
+ && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
+ && parts.index
+ && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
+ && parts.base != parts.index)
+ cost++;
+
+ /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
+ since it's predecode logic can't detect the length of instructions
+ and it degenerates to vector decoded. Increase cost of such
+ addresses here. The penalty is minimally 2 cycles. It may be worthwhile
+ to split such addresses or even refuse such addresses at all.
+
+ Following addressing modes are affected:
+ [base+scale*index]
+ [scale*index+disp]
+ [base+index]
+
+ The first and last case may be avoidable by explicitly coding the zero in
+ memory address, but I don't have AMD-K6 machine handy to check this
+ theory. */
+
+ if (TARGET_K6
+ && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
+ || (parts.disp && !parts.base && parts.index && parts.scale != 1)
+ || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
+ cost += 10;
+
+ return cost;
+}
+
+/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
+ this is used for to form addresses to local data when -fPIC is in
+ use. */
+
+static bool
+darwin_local_data_pic (rtx disp)
+{
+ return (GET_CODE (disp) == UNSPEC
+ && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
+}
+
+/* Determine if a given RTX is a valid constant. We already know this
+ satisfies CONSTANT_P. */
+
+bool
+legitimate_constant_p (rtx x)
+{
+ switch (GET_CODE (x))
+ {
+ case CONST:
+ x = XEXP (x, 0);
+
+ if (GET_CODE (x) == PLUS)
+ {
+ if (!CONST_INT_P (XEXP (x, 1)))
+ return false;
+ x = XEXP (x, 0);
+ }
+
+ if (TARGET_MACHO && darwin_local_data_pic (x))
+ return true;
+
+ /* Only some unspecs are valid as "constants". */
+ if (GET_CODE (x) == UNSPEC)
+ switch (XINT (x, 1))
+ {
+ case UNSPEC_GOT:
+ case UNSPEC_GOTOFF:
+ case UNSPEC_PLTOFF:
+ return TARGET_64BIT;
+ case UNSPEC_TPOFF:
+ case UNSPEC_NTPOFF:
+ x = XVECEXP (x, 0, 0);
+ return (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+ case UNSPEC_DTPOFF:
+ x = XVECEXP (x, 0, 0);
+ return (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
+ default:
+ return false;
+ }
+
+ /* We must have drilled down to a symbol. */
+ if (GET_CODE (x) == LABEL_REF)
+ return true;
+ if (GET_CODE (x) != SYMBOL_REF)
+ return false;
+ /* FALLTHRU */
+
+ case SYMBOL_REF:
+ /* TLS symbols are never valid. */
+ if (SYMBOL_REF_TLS_MODEL (x))
+ return false;
+
+ /* DLLIMPORT symbols are never valid. */
+ if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
+ && SYMBOL_REF_DLLIMPORT_P (x))
+ return false;
+ break;
+
+ case CONST_DOUBLE:
+ if (GET_MODE (x) == TImode
+ && x != CONST0_RTX (TImode)
+ && !TARGET_64BIT)
+ return false;
+ break;
+
+ case CONST_VECTOR:
+ if (x == CONST0_RTX (GET_MODE (x))
+ || (vector_all_ones_operand (x, GET_MODE (x)) && TARGET_SSE))
+ return true;
+ return false;
+
+ default:
+ break;
+ }
+
+ /* Otherwise we handle everything else in the move patterns. */
+ return true;
+}
+
+/* Determine if it's legal to put X into the constant pool. This
+ is not possible for the address of thread-local symbols, which
+ is checked above. */
+
+static bool
+ix86_cannot_force_const_mem (rtx x)
+{
+ /* We can always put integral constants and vectors in memory. */
+ switch (GET_CODE (x))
+ {
+ case CONST_INT:
+ case CONST_DOUBLE:
+ case CONST_VECTOR:
+ return false;
+
+ default:
+ break;
+ }
+ return !legitimate_constant_p (x);
+}
+
+/* Determine if a given RTX is a valid constant address. */
+
+bool
+constant_address_p (rtx x)
+{
+ return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
+}
+
+/* Nonzero if the constant value X is a legitimate general operand
+ when generating PIC code. It is given that flag_pic is on and
+ that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
+
+bool
+legitimate_pic_operand_p (rtx x)
+{
+ rtx inner;
+
+ switch (GET_CODE (x))
+ {
+ case CONST:
+ inner = XEXP (x, 0);
+ if (GET_CODE (inner) == PLUS
+ && CONST_INT_P (XEXP (inner, 1)))
+ inner = XEXP (inner, 0);
+
+ /* Only some unspecs are valid as "constants". */
+ if (GET_CODE (inner) == UNSPEC)
+ switch (XINT (inner, 1))
+ {
+ case UNSPEC_GOT:
+ case UNSPEC_GOTOFF:
+ case UNSPEC_PLTOFF:
+ return TARGET_64BIT;
+ case UNSPEC_TPOFF:
+ x = XVECEXP (inner, 0, 0);
+ return (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+ case UNSPEC_MACHOPIC_OFFSET:
+ return legitimate_pic_address_disp_p (x);
+ default:
+ return false;
+ }
+ /* FALLTHRU */
+
+ case SYMBOL_REF:
+ case LABEL_REF:
+ return legitimate_pic_address_disp_p (x);
+
+ default:
+ return true;
+ }
+}
+
+/* Determine if a given CONST RTX is a valid memory displacement
+ in PIC mode. */
+
+int
+legitimate_pic_address_disp_p (rtx disp)
+{
+ bool saw_plus;
+
+ /* In 64bit mode we can allow direct addresses of symbols and labels
+ when they are not dynamic symbols. */
+ if (TARGET_64BIT)
+ {
+ rtx op0 = disp, op1;
+
+ switch (GET_CODE (disp))
+ {
+ case LABEL_REF:
+ return true;
+
+ case CONST:
+ if (GET_CODE (XEXP (disp, 0)) != PLUS)
+ break;
+ op0 = XEXP (XEXP (disp, 0), 0);
+ op1 = XEXP (XEXP (disp, 0), 1);
+ if (!CONST_INT_P (op1)
+ || INTVAL (op1) >= 16*1024*1024
+ || INTVAL (op1) < -16*1024*1024)
+ break;
+ if (GET_CODE (op0) == LABEL_REF)
+ return true;
+ if (GET_CODE (op0) != SYMBOL_REF)
+ break;
+ /* FALLTHRU */
+
+ case SYMBOL_REF:
+ /* TLS references should always be enclosed in UNSPEC. */
+ if (SYMBOL_REF_TLS_MODEL (op0))
+ return false;
+ if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
+ && ix86_cmodel != CM_LARGE_PIC)
+ return true;
+ break;
+
+ default:
+ break;
+ }
+ }
+ if (GET_CODE (disp) != CONST)
+ return 0;
+ disp = XEXP (disp, 0);
+
+ if (TARGET_64BIT)
+ {
+ /* We are unsafe to allow PLUS expressions. This limit allowed distance
+ of GOT tables. We should not need these anyway. */
+ if (GET_CODE (disp) != UNSPEC
+ || (XINT (disp, 1) != UNSPEC_GOTPCREL
+ && XINT (disp, 1) != UNSPEC_GOTOFF
+ && XINT (disp, 1) != UNSPEC_PLTOFF))
+ return 0;
+
+ if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
+ && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
+ return 0;
+ return 1;
+ }
+
+ saw_plus = false;
+ if (GET_CODE (disp) == PLUS)
+ {
+ if (!CONST_INT_P (XEXP (disp, 1)))
+ return 0;
+ disp = XEXP (disp, 0);
+ saw_plus = true;
+ }
+
+ if (TARGET_MACHO && darwin_local_data_pic (disp))
+ return 1;
+
+ if (GET_CODE (disp) != UNSPEC)
+ return 0;
+
+ switch (XINT (disp, 1))
+ {
+ case UNSPEC_GOT:
+ if (saw_plus)
+ return false;
+ /* We need to check for both symbols and labels because VxWorks loads
+ text labels with @GOT rather than @GOTOFF. See gotoff_operand for
+ details. */
+ return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
+ || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
+ case UNSPEC_GOTOFF:
+ /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
+ While ABI specify also 32bit relocation but we don't produce it in
+ small PIC model at all. */
+ if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
+ || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
+ && !TARGET_64BIT)
+ return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
+ return false;
+ case UNSPEC_GOTTPOFF:
+ case UNSPEC_GOTNTPOFF:
+ case UNSPEC_INDNTPOFF:
+ if (saw_plus)
+ return false;
+ disp = XVECEXP (disp, 0, 0);
+ return (GET_CODE (disp) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
+ case UNSPEC_NTPOFF:
+ disp = XVECEXP (disp, 0, 0);
+ return (GET_CODE (disp) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
+ case UNSPEC_DTPOFF:
+ disp = XVECEXP (disp, 0, 0);
+ return (GET_CODE (disp) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
+ }
+
+ return 0;
+}
+
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
+ memory address for an instruction. The MODE argument is the machine mode
+ for the MEM expression that wants to use this address.
+
+ It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
+ convert common non-canonical forms to canonical form so that they will
+ be recognized. */
+
+int
+legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+ rtx addr, int strict)
+{
+ struct ix86_address parts;
+ rtx base, index, disp;
+ HOST_WIDE_INT scale;
+ const char *reason = NULL;
+ rtx reason_rtx = NULL_RTX;
+
+ if (ix86_decompose_address (addr, &parts) <= 0)
+ {
+ reason = "decomposition failed";
+ goto report_error;
+ }
+
+ base = parts.base;
+ index = parts.index;
+ disp = parts.disp;
+ scale = parts.scale;
+
+ /* Validate base register.
+
+ Don't allow SUBREG's that span more than a word here. It can lead to spill
+ failures when the base is one word out of a two word structure, which is
+ represented internally as a DImode int. */
+
+ if (base)
+ {
+ rtx reg;
+ reason_rtx = base;
+
+ if (REG_P (base))
+ reg = base;
+ else if (GET_CODE (base) == SUBREG
+ && REG_P (SUBREG_REG (base))
+ && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
+ <= UNITS_PER_WORD)
+ reg = SUBREG_REG (base);
+ else
+ {
+ reason = "base is not a register";
+ goto report_error;
+ }
+
+ if (GET_MODE (base) != Pmode)
+ {
+ reason = "base is not in Pmode";
+ goto report_error;
+ }
+
+ if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
+ || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
+ {
+ reason = "base is not valid";
+ goto report_error;
+ }
+ }
+
+ /* Validate index register.
+
+ Don't allow SUBREG's that span more than a word here -- same as above. */
+
+ if (index)
+ {
+ rtx reg;
+ reason_rtx = index;
+
+ if (REG_P (index))
+ reg = index;
+ else if (GET_CODE (index) == SUBREG
+ && REG_P (SUBREG_REG (index))
+ && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
+ <= UNITS_PER_WORD)
+ reg = SUBREG_REG (index);
+ else
+ {
+ reason = "index is not a register";
+ goto report_error;
+ }
+
+ if (GET_MODE (index) != Pmode)
+ {
+ reason = "index is not in Pmode";
+ goto report_error;
+ }
+
+ if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
+ || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
+ {
+ reason = "index is not valid";
+ goto report_error;
+ }
+ }
+
+ /* Validate scale factor. */
+ if (scale != 1)
+ {
+ reason_rtx = GEN_INT (scale);
+ if (!index)
+ {
+ reason = "scale without index";
+ goto report_error;
+ }
+
+ if (scale != 2 && scale != 4 && scale != 8)
+ {
+ reason = "scale is not a valid multiplier";
+ goto report_error;
+ }
+ }
+
+ /* Validate displacement. */
+ if (disp)
+ {
+ reason_rtx = disp;
+
+ if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == UNSPEC
+ && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
+ switch (XINT (XEXP (disp, 0), 1))
+ {
+ /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
+ used. While ABI specify also 32bit relocations, we don't produce
+ them at all and use IP relative instead. */
+ case UNSPEC_GOT:
+ case UNSPEC_GOTOFF:
+ gcc_assert (flag_pic);
+ if (!TARGET_64BIT)
+ goto is_legitimate_pic;
+ reason = "64bit address unspec";
+ goto report_error;
+
+ case UNSPEC_GOTPCREL:
+ gcc_assert (flag_pic);
+ goto is_legitimate_pic;
+
+ case UNSPEC_GOTTPOFF:
+ case UNSPEC_GOTNTPOFF:
+ case UNSPEC_INDNTPOFF:
+ case UNSPEC_NTPOFF:
+ case UNSPEC_DTPOFF:
+ break;
+
+ default:
+ reason = "invalid address unspec";
+ goto report_error;
+ }
+
+ else if (SYMBOLIC_CONST (disp)
+ && (flag_pic
+ || (TARGET_MACHO
+#if TARGET_MACHO
+ && MACHOPIC_INDIRECT
+ && !machopic_operand_p (disp)
+#endif
+ )))
+ {
+
+ is_legitimate_pic:
+ if (TARGET_64BIT && (index || base))
+ {
+ /* foo@dtpoff(%rX) is ok. */
+ if (GET_CODE (disp) != CONST
+ || GET_CODE (XEXP (disp, 0)) != PLUS
+ || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
+ || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
+ || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
+ && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
+ {
+ reason = "non-constant pic memory reference";
+ goto report_error;
+ }
+ }
+ else if (! legitimate_pic_address_disp_p (disp))
+ {
+ reason = "displacement is an invalid pic construct";
+ goto report_error;
+ }
+
+ /* This code used to verify that a symbolic pic displacement
+ includes the pic_offset_table_rtx register.
+
+ While this is good idea, unfortunately these constructs may
+ be created by "adds using lea" optimization for incorrect
+ code like:
+
+ int a;
+ int foo(int i)
+ {
+ return *(&a+i);
+ }
+
+ This code is nonsensical, but results in addressing
+ GOT table with pic_offset_table_rtx base. We can't
+ just refuse it easily, since it gets matched by
+ "addsi3" pattern, that later gets split to lea in the
+ case output register differs from input. While this
+ can be handled by separate addsi pattern for this case
+ that never results in lea, this seems to be easier and
+ correct fix for crash to disable this test. */
+ }
+ else if (GET_CODE (disp) != LABEL_REF
+ && !CONST_INT_P (disp)
+ && (GET_CODE (disp) != CONST
+ || !legitimate_constant_p (disp))
+ && (GET_CODE (disp) != SYMBOL_REF
+ || !legitimate_constant_p (disp)))
+ {
+ reason = "displacement is not constant";
+ goto report_error;
+ }
+ else if (TARGET_64BIT
+ && !x86_64_immediate_operand (disp, VOIDmode))
+ {
+ reason = "displacement is out of range";
+ goto report_error;
+ }
+ }
+
+ /* Everything looks valid. */
+ return TRUE;
+
+ report_error:
+ return FALSE;
+}
+
+/* Return a unique alias set for the GOT. */
+
+static alias_set_type
+ix86_GOT_alias_set (void)
+{
+ static alias_set_type set = -1;
+ if (set == -1)
+ set = new_alias_set ();
+ return set;
+}
+
+/* Return a legitimate reference for ORIG (an address) using the
+ register REG. If REG is 0, a new pseudo is generated.
+
+ There are two types of references that must be handled:
+
+ 1. Global data references must load the address from the GOT, via
+ the PIC reg. An insn is emitted to do this load, and the reg is
+ returned.
+
+ 2. Static data references, constant pool addresses, and code labels
+ compute the address as an offset from the GOT, whose base is in
+ the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
+ differentiate them from global data objects. The returned
+ address is the PIC reg + an unspec constant.
+
+ GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
+ reg also appears in the address. */
+
+static rtx
+legitimize_pic_address (rtx orig, rtx reg)
+{
+ rtx addr = orig;
+ rtx new_rtx = orig;
+ rtx base;
+
+#if TARGET_MACHO
+ if (TARGET_MACHO && !TARGET_64BIT)
+ {
+ if (reg == 0)
+ reg = gen_reg_rtx (Pmode);
+ /* Use the generic Mach-O PIC machinery. */
+ return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
+ }
+#endif
+
+ if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
+ new_rtx = addr;
+ else if (TARGET_64BIT
+ && ix86_cmodel != CM_SMALL_PIC
+ && gotoff_operand (addr, Pmode))
+ {
+ rtx tmpreg;
+ /* This symbol may be referenced via a displacement from the PIC
+ base address (@GOTOFF). */
+
+ if (reload_in_progress)
+ df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
+ if (GET_CODE (addr) == PLUS)
+ {
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
+ UNSPEC_GOTOFF);
+ new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
+ }
+ else
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+ if (!reg)
+ tmpreg = gen_reg_rtx (Pmode);
+ else
+ tmpreg = reg;
+ emit_move_insn (tmpreg, new_rtx);
+
+ if (reg != 0)
+ {
+ new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
+ tmpreg, 1, OPTAB_DIRECT);
+ new_rtx = reg;
+ }
+ else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
+ }
+ else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
+ {
+ /* This symbol may be referenced via a displacement from the PIC
+ base address (@GOTOFF). */
+
+ if (reload_in_progress)
+ df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
+ if (GET_CODE (addr) == PLUS)
+ {
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
+ UNSPEC_GOTOFF);
+ new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
+ }
+ else
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+
+ if (reg != 0)
+ {
+ emit_move_insn (reg, new_rtx);
+ new_rtx = reg;
+ }
+ }
+ else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
+ /* We can't use @GOTOFF for text labels on VxWorks;
+ see gotoff_operand. */
+ || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
+ {
+ if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
+ {
+ if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
+ return legitimize_dllimport_symbol (addr, true);
+ if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
+ && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
+ {
+ rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
+ return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
+ }
+ }
+
+ if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
+ {
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+ new_rtx = gen_const_mem (Pmode, new_rtx);
+ set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
+
+ if (reg == 0)
+ reg = gen_reg_rtx (Pmode);
+ /* Use directly gen_movsi, otherwise the address is loaded
+ into register for CSE. We don't want to CSE this addresses,
+ instead we CSE addresses from the GOT table, so skip this. */
+ emit_insn (gen_movsi (reg, new_rtx));
+ new_rtx = reg;
+ }
+ else
+ {
+ /* This symbol must be referenced via a load from the
+ Global Offset Table (@GOT). */
+
+ if (reload_in_progress)
+ df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+ if (TARGET_64BIT)
+ new_rtx = force_reg (Pmode, new_rtx);
+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+ new_rtx = gen_const_mem (Pmode, new_rtx);
+ set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
+
+ if (reg == 0)
+ reg = gen_reg_rtx (Pmode);
+ emit_move_insn (reg, new_rtx);
+ new_rtx = reg;
+ }
+ }
+ else
+ {
+ if (CONST_INT_P (addr)
+ && !x86_64_immediate_operand (addr, VOIDmode))
+ {
+ if (reg)
+ {
+ emit_move_insn (reg, addr);
+ new_rtx = reg;
+ }
+ else
+ new_rtx = force_reg (Pmode, addr);
+ }
+ else if (GET_CODE (addr) == CONST)
+ {
+ addr = XEXP (addr, 0);
+
+ /* We must match stuff we generate before. Assume the only
+ unspecs that can get here are ours. Not that we could do
+ anything with them anyway.... */
+ if (GET_CODE (addr) == UNSPEC
+ || (GET_CODE (addr) == PLUS
+ && GET_CODE (XEXP (addr, 0)) == UNSPEC))
+ return orig;
+ gcc_assert (GET_CODE (addr) == PLUS);
+ }
+ if (GET_CODE (addr) == PLUS)
+ {
+ rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
+
+ /* Check first to see if this is a constant offset from a @GOTOFF
+ symbol reference. */
+ if (gotoff_operand (op0, Pmode)
+ && CONST_INT_P (op1))
+ {
+ if (!TARGET_64BIT)
+ {
+ if (reload_in_progress)
+ df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
+ UNSPEC_GOTOFF);
+ new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
+ new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+
+ if (reg != 0)
+ {
+ emit_move_insn (reg, new_rtx);
+ new_rtx = reg;
+ }
+ }
+ else
+ {
+ if (INTVAL (op1) < -16*1024*1024
+ || INTVAL (op1) >= 16*1024*1024)
+ {
+ if (!x86_64_immediate_operand (op1, Pmode))
+ op1 = force_reg (Pmode, op1);
+ new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
+ }
+ }
+ }
+ else
+ {
+ base = legitimize_pic_address (XEXP (addr, 0), reg);
+ new_rtx = legitimize_pic_address (XEXP (addr, 1),
+ base == reg ? NULL_RTX : reg);
+
+ if (CONST_INT_P (new_rtx))
+ new_rtx = plus_constant (base, INTVAL (new_rtx));
+ else
+ {
+ if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
+ {
+ base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
+ new_rtx = XEXP (new_rtx, 1);
+ }
+ new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
+ }
+ }
+ }
+ }
+ return new_rtx;
+}
+
+/* Load the thread pointer. If TO_REG is true, force it into a register. */
+
+static rtx
+get_thread_pointer (int to_reg)
+{
+ rtx tp, reg, insn;
+
+ tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
+ if (!to_reg)
+ return tp;
+
+ reg = gen_reg_rtx (Pmode);
+ insn = gen_rtx_SET (VOIDmode, reg, tp);
+ insn = emit_insn (insn);
+
+ return reg;
+}
+
+/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
+ false if we expect this to be used for a memory address and true if
+ we expect to load the address into a register. */
+
+static rtx
+legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
+{
+ rtx dest, base, off, pic, tp;
+ int type;
+
+ switch (model)
+ {
+ case TLS_MODEL_GLOBAL_DYNAMIC:
+ dest = gen_reg_rtx (Pmode);
+ tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
+
+ if (TARGET_64BIT && ! TARGET_GNU2_TLS)
+ {
+ rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
+
+ start_sequence ();
+ emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
+ insns = get_insns ();
+ end_sequence ();
+
+ RTL_CONST_CALL_P (insns) = 1;
+ emit_libcall_block (insns, dest, rax, x);
+ }
+ else if (TARGET_64BIT && TARGET_GNU2_TLS)
+ emit_insn (gen_tls_global_dynamic_64 (dest, x));
+ else
+ emit_insn (gen_tls_global_dynamic_32 (dest, x));
+
+ if (TARGET_GNU2_TLS)
+ {
+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
+
+ set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
+ }
+ break;
+
+ case TLS_MODEL_LOCAL_DYNAMIC:
+ base = gen_reg_rtx (Pmode);
+ tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
+
+ if (TARGET_64BIT && ! TARGET_GNU2_TLS)
+ {
+ rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
+
+ start_sequence ();
+ emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
+ insns = get_insns ();
+ end_sequence ();
+
+ note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
+ note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
+ RTL_CONST_CALL_P (insns) = 1;
+ emit_libcall_block (insns, base, rax, note);
+ }
+ else if (TARGET_64BIT && TARGET_GNU2_TLS)
+ emit_insn (gen_tls_local_dynamic_base_64 (base));
+ else
+ emit_insn (gen_tls_local_dynamic_base_32 (base));
+
+ if (TARGET_GNU2_TLS)
+ {
+ rtx x = ix86_tls_module_base ();
+
+ set_unique_reg_note (get_last_insn (), REG_EQUIV,
+ gen_rtx_MINUS (Pmode, x, tp));
+ }
+
+ off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
+ off = gen_rtx_CONST (Pmode, off);
+
+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
+
+ if (TARGET_GNU2_TLS)
+ {
+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
+
+ set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
+ }
+
+ break;
+
+ case TLS_MODEL_INITIAL_EXEC:
+ if (TARGET_64BIT)
+ {
+ pic = NULL;
+ type = UNSPEC_GOTNTPOFF;
+ }
+ else if (flag_pic)
+ {
+ if (reload_in_progress)
+ df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+ pic = pic_offset_table_rtx;
+ type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
+ }
+ else if (!TARGET_ANY_GNU_TLS)
+ {
+ pic = gen_reg_rtx (Pmode);
+ emit_insn (gen_set_got (pic));
+ type = UNSPEC_GOTTPOFF;
+ }
+ else
+ {
+ pic = NULL;
+ type = UNSPEC_INDNTPOFF;
+ }
+
+ off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
+ off = gen_rtx_CONST (Pmode, off);
+ if (pic)
+ off = gen_rtx_PLUS (Pmode, pic, off);
+ off = gen_const_mem (Pmode, off);
+ set_mem_alias_set (off, ix86_GOT_alias_set ());
+
+ if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+ {
+ base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
+ off = force_reg (Pmode, off);
+ return gen_rtx_PLUS (Pmode, base, off);
+ }
+ else
+ {
+ base = get_thread_pointer (true);
+ dest = gen_reg_rtx (Pmode);
+ emit_insn (gen_subsi3 (dest, base, off));
+ }
+ break;
+
+ case TLS_MODEL_LOCAL_EXEC:
+ off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
+ (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+ ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
+ off = gen_rtx_CONST (Pmode, off);
+
+ if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
+ {
+ base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
+ return gen_rtx_PLUS (Pmode, base, off);
+ }
+ else
+ {
+ base = get_thread_pointer (true);
+ dest = gen_reg_rtx (Pmode);
+ emit_insn (gen_subsi3 (dest, base, off));
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return dest;
+}
+
+/* Create or return the unique __imp_DECL dllimport symbol corresponding
+ to symbol DECL. */
+
+static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
+ htab_t dllimport_map;
+
+static tree
+get_dllimport_decl (tree decl)
+{
+ struct tree_map *h, in;
+ void **loc;
+ const char *name;
+ const char *prefix;
+ size_t namelen, prefixlen;
+ char *imp_name;
+ tree to;
+ rtx rtl;
+
+ if (!dllimport_map)
+ dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
+
+ in.hash = htab_hash_pointer (decl);
+ in.base.from = decl;
+ loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
+ h = (struct tree_map *) *loc;
+ if (h)
+ return h->to;
+
+ *loc = h = GGC_NEW (struct tree_map);
+ h->hash = in.hash;
+ h->base.from = decl;
+ h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
+ DECL_ARTIFICIAL (to) = 1;
+ DECL_IGNORED_P (to) = 1;
+ DECL_EXTERNAL (to) = 1;
+ TREE_READONLY (to) = 1;
+
+ name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+ name = targetm.strip_name_encoding (name);
+ prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
+ ? "*__imp_" : "*__imp__";
+ namelen = strlen (name);
+ prefixlen = strlen (prefix);
+ imp_name = (char *) alloca (namelen + prefixlen + 1);
+ memcpy (imp_name, prefix, prefixlen);
+ memcpy (imp_name + prefixlen, name, namelen + 1);
+
+ name = ggc_alloc_string (imp_name, namelen + prefixlen);
+ rtl = gen_rtx_SYMBOL_REF (Pmode, name);
+ SET_SYMBOL_REF_DECL (rtl, to);
+ SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
+
+ rtl = gen_const_mem (Pmode, rtl);
+ set_mem_alias_set (rtl, ix86_GOT_alias_set ());
+
+ SET_DECL_RTL (to, rtl);
+ SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
+
+ return to;
+}
+
+/* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
+ true if we require the result be a register. */
+
+static rtx
+legitimize_dllimport_symbol (rtx symbol, bool want_reg)
+{
+ tree imp_decl;
+ rtx x;
+
+ gcc_assert (SYMBOL_REF_DECL (symbol));
+ imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
+
+ x = DECL_RTL (imp_decl);
+ if (want_reg)
+ x = force_reg (Pmode, x);
+ return x;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+ to be legitimate. If we find one, return the new, valid address.
+ This macro is used in only one place: `memory_address' in explow.c.
+
+ OLDX is the address as it was before break_out_memory_refs was called.
+ In some cases it is useful to look at this to decide what needs to be done.
+
+ MODE and WIN are passed so that this macro can use
+ GO_IF_LEGITIMATE_ADDRESS.
+
+ It is always safe for this macro to do nothing. It exists to recognize
+ opportunities to optimize the output.
+
+ For the 80386, we handle X+REG by loading X into a register R and
+ using R+REG. R will go in a general reg and indexing will be used.
+ However, if REG is a broken-out memory address or multiplication,
+ nothing needs to be done because REG can certainly go in a general reg.
+
+ When -fpic is used, special handling is needed for symbolic references.
+ See comments by legitimize_pic_address in i386.c for details. */
+
+rtx
+legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
+{
+ int changed = 0;
+ unsigned log;
+
+ log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
+ if (log)
+ return legitimize_tls_address (x, (enum tls_model) log, false);
+ if (GET_CODE (x) == CONST
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+ && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
+ {
+ rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
+ (enum tls_model) log, false);
+ return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
+ }
+
+ if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
+ {
+ if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
+ return legitimize_dllimport_symbol (x, true);
+ if (GET_CODE (x) == CONST
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+ && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
+ {
+ rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
+ return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
+ }
+ }
+
+ if (flag_pic && SYMBOLIC_CONST (x))
+ return legitimize_pic_address (x, 0);
+
+ /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
+ if (GET_CODE (x) == ASHIFT
+ && CONST_INT_P (XEXP (x, 1))
+ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
+ {
+ changed = 1;
+ log = INTVAL (XEXP (x, 1));
+ x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
+ GEN_INT (1 << log));
+ }
+
+ if (GET_CODE (x) == PLUS)
+ {
+ /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
+
+ if (GET_CODE (XEXP (x, 0)) == ASHIFT
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
+ {
+ changed = 1;
+ log = INTVAL (XEXP (XEXP (x, 0), 1));
+ XEXP (x, 0) = gen_rtx_MULT (Pmode,
+ force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
+ GEN_INT (1 << log));
+ }
+
+ if (GET_CODE (XEXP (x, 1)) == ASHIFT
+ && CONST_INT_P (XEXP (XEXP (x, 1), 1))
+ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
+ {
+ changed = 1;
+ log = INTVAL (XEXP (XEXP (x, 1), 1));
+ XEXP (x, 1) = gen_rtx_MULT (Pmode,
+ force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
+ GEN_INT (1 << log));
+ }
+
+ /* Put multiply first if it isn't already. */
+ if (GET_CODE (XEXP (x, 1)) == MULT)
+ {
+ rtx tmp = XEXP (x, 0);
+ XEXP (x, 0) = XEXP (x, 1);
+ XEXP (x, 1) = tmp;
+ changed = 1;
+ }
+
+ /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
+ into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
+ created by virtual register instantiation, register elimination, and
+ similar optimizations. */
+ if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
+ {
+ changed = 1;
+ x = gen_rtx_PLUS (Pmode,
+ gen_rtx_PLUS (Pmode, XEXP (x, 0),
+ XEXP (XEXP (x, 1), 0)),
+ XEXP (XEXP (x, 1), 1));
+ }
+
+ /* Canonicalize
+ (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
+ into (plus (plus (mult (reg) (const)) (reg)) (const)). */
+ else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
+ && CONSTANT_P (XEXP (x, 1)))
+ {
+ rtx constant;
+ rtx other = NULL_RTX;
+
+ if (CONST_INT_P (XEXP (x, 1)))
+ {
+ constant = XEXP (x, 1);
+ other = XEXP (XEXP (XEXP (x, 0), 1), 1);
+ }
+ else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
+ {
+ constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
+ other = XEXP (x, 1);
+ }
+ else
+ constant = 0;
+
+ if (constant)
+ {
+ changed = 1;
+ x = gen_rtx_PLUS (Pmode,
+ gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
+ XEXP (XEXP (XEXP (x, 0), 1), 0)),
+ plus_constant (other, INTVAL (constant)));
+ }
+ }
+
+ if (changed && legitimate_address_p (mode, x, FALSE))
+ return x;
+
+ if (GET_CODE (XEXP (x, 0)) == MULT)
+ {
+ changed = 1;
+ XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
+ }
+
+ if (GET_CODE (XEXP (x, 1)) == MULT)
+ {
+ changed = 1;
+ XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
+ }
+
+ if (changed
+ && REG_P (XEXP (x, 1))
+ && REG_P (XEXP (x, 0)))
+ return x;
+
+ if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
+ {
+ changed = 1;
+ x = legitimize_pic_address (x, 0);
+ }
+
+ if (changed && legitimate_address_p (mode, x, FALSE))
+ return x;
+
+ if (REG_P (XEXP (x, 0)))
+ {
+ rtx temp = gen_reg_rtx (Pmode);
+ rtx val = force_operand (XEXP (x, 1), temp);
+ if (val != temp)
+ emit_move_insn (temp, val);
+
+ XEXP (x, 1) = temp;
+ return x;
+ }
+
+ else if (REG_P (XEXP (x, 1)))
+ {
+ rtx temp = gen_reg_rtx (Pmode);
+ rtx val = force_operand (XEXP (x, 0), temp);
+ if (val != temp)
+ emit_move_insn (temp, val);
+
+ XEXP (x, 0) = temp;
+ return x;
+ }
+ }
+
+ return x;
+}
+
+/* Print an integer constant expression in assembler syntax. Addition
+ and subtraction are the only arithmetic that may appear in these
+ expressions. FILE is the stdio stream to write to, X is the rtx, and
+ CODE is the operand print code from the output string. */
+
+static void
+output_pic_addr_const (FILE *file, rtx x, int code)
+{
+ char buf[256];
+
+ switch (GET_CODE (x))
+ {
+ case PC:
+ gcc_assert (flag_pic);
+ putc ('.', file);
+ break;
+
+ case SYMBOL_REF:
+ if (! TARGET_MACHO || TARGET_64BIT)
+ output_addr_const (file, x);
+ else
+ {
+ const char *name = XSTR (x, 0);
+
+ /* Mark the decl as referenced so that cgraph will
+ output the function. */
+ if (SYMBOL_REF_DECL (x))
+ mark_decl_referenced (SYMBOL_REF_DECL (x));
+
+#if TARGET_MACHO
+ if (MACHOPIC_INDIRECT
+ && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
+ name = machopic_indirection_name (x, /*stub_p=*/true);
+#endif
+ assemble_name (file, name);
+ }
+ if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
+ && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
+ fputs ("@PLT", file);
+ break;
+
+ case LABEL_REF:
+ x = XEXP (x, 0);
+ /* FALLTHRU */
+ case CODE_LABEL:
+ ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
+ assemble_name (asm_out_file, buf);
+ break;
+
+ case CONST_INT:
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ break;
+
+ case CONST:
+ /* This used to output parentheses around the expression,
+ but that does not work on the 386 (either ATT or BSD assembler). */
+ output_pic_addr_const (file, XEXP (x, 0), code);
+ break;
+
+ case CONST_DOUBLE:
+ if (GET_MODE (x) == VOIDmode)
+ {
+ /* We can use %d if the number is <32 bits and positive. */
+ if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
+ fprintf (file, "0x%lx%08lx",
+ (unsigned long) CONST_DOUBLE_HIGH (x),
+ (unsigned long) CONST_DOUBLE_LOW (x));
+ else
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
+ }
+ else
+ /* We can't handle floating point constants;
+ PRINT_OPERAND must handle them. */
+ output_operand_lossage ("floating constant misused");
+ break;
+
+ case PLUS:
+ /* Some assemblers need integer constants to appear first. */
+ if (CONST_INT_P (XEXP (x, 0)))
+ {
+ output_pic_addr_const (file, XEXP (x, 0), code);
+ putc ('+', file);
+ output_pic_addr_const (file, XEXP (x, 1), code);
+ }
+ else
+ {
+ gcc_assert (CONST_INT_P (XEXP (x, 1)));
+ output_pic_addr_const (file, XEXP (x, 1), code);
+ putc ('+', file);
+ output_pic_addr_const (file, XEXP (x, 0), code);
+ }
+ break;
+
+ case MINUS:
+ if (!TARGET_MACHO)
+ putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
+ output_pic_addr_const (file, XEXP (x, 0), code);
+ putc ('-', file);
+ output_pic_addr_const (file, XEXP (x, 1), code);
+ if (!TARGET_MACHO)
+ putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
+ break;
+
+ case UNSPEC:
+ gcc_assert (XVECLEN (x, 0) == 1);
+ output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
+ switch (XINT (x, 1))
+ {
+ case UNSPEC_GOT:
+ fputs ("@GOT", file);
+ break;
+ case UNSPEC_GOTOFF:
+ fputs ("@GOTOFF", file);
+ break;
+ case UNSPEC_PLTOFF:
+ fputs ("@PLTOFF", file);
+ break;
+ case UNSPEC_GOTPCREL:
+ fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+ "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
+ break;
+ case UNSPEC_GOTTPOFF:
+ /* FIXME: This might be @TPOFF in Sun ld too. */
+ fputs ("@GOTTPOFF", file);
+ break;
+ case UNSPEC_TPOFF:
+ fputs ("@TPOFF", file);
+ break;
+ case UNSPEC_NTPOFF:
+ if (TARGET_64BIT)
+ fputs ("@TPOFF", file);
+ else
+ fputs ("@NTPOFF", file);
+ break;
+ case UNSPEC_DTPOFF:
+ fputs ("@DTPOFF", file);
+ break;
+ case UNSPEC_GOTNTPOFF:
+ if (TARGET_64BIT)
+ fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+ "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
+ else
+ fputs ("@GOTNTPOFF", file);
+ break;
+ case UNSPEC_INDNTPOFF:
+ fputs ("@INDNTPOFF", file);
+ break;
+#if TARGET_MACHO
+ case UNSPEC_MACHOPIC_OFFSET:
+ putc ('-', file);
+ machopic_output_function_base_name (file);
+ break;
+#endif
+ default:
+ output_operand_lossage ("invalid UNSPEC as operand");
+ break;
+ }
+ break;
+
+ default:
+ output_operand_lossage ("invalid expression as operand");
+ }
+}
+
+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+ We need to emit DTP-relative relocations. */
+
+static void ATTRIBUTE_UNUSED
+i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
+{
+ fputs (ASM_LONG, file);
+ output_addr_const (file, x);
+ fputs ("@DTPOFF", file);
+ switch (size)
+ {
+ case 4:
+ break;
+ case 8:
+ fputs (", 0", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return true if X is a representation of the PIC register. This copes
+ with calls from ix86_find_base_term, where the register might have
+ been replaced by a cselib value. */
+
+static bool
+ix86_pic_register_p (rtx x)
+{
+ if (GET_CODE (x) == VALUE)
+ return (pic_offset_table_rtx
+ && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
+ else
+ return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
+}
+
+/* In the name of slightly smaller debug output, and to cater to
+ general assembler lossage, recognize PIC+GOTOFF and turn it back
+ into a direct symbol reference.
+
+ On Darwin, this is necessary to avoid a crash, because Darwin
+ has a different PIC label for each routine but the DWARF debugging
+ information is not associated with any particular routine, so it's
+ necessary to remove references to the PIC label from RTL stored by
+ the DWARF output code. */
+
+static rtx
+ix86_delegitimize_address (rtx orig_x)
+{
+ rtx x = orig_x;
+ /* reg_addend is NULL or a multiple of some register. */
+ rtx reg_addend = NULL_RTX;
+ /* const_addend is NULL or a const_int. */
+ rtx const_addend = NULL_RTX;
+ /* This is the result, or NULL. */
+ rtx result = NULL_RTX;
+
+ if (MEM_P (x))
+ x = XEXP (x, 0);
+
+ if (TARGET_64BIT)
+ {
+ if (GET_CODE (x) != CONST
+ || GET_CODE (XEXP (x, 0)) != UNSPEC
+ || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
+ || !MEM_P (orig_x))
+ return orig_x;
+ return XVECEXP (XEXP (x, 0), 0, 0);
+ }
+
+ if (GET_CODE (x) != PLUS
+ || GET_CODE (XEXP (x, 1)) != CONST)
+ return orig_x;
+
+ if (ix86_pic_register_p (XEXP (x, 0)))
+ /* %ebx + GOT/GOTOFF */
+ ;
+ else if (GET_CODE (XEXP (x, 0)) == PLUS)
+ {
+ /* %ebx + %reg * scale + GOT/GOTOFF */
+ reg_addend = XEXP (x, 0);
+ if (ix86_pic_register_p (XEXP (reg_addend, 0)))
+ reg_addend = XEXP (reg_addend, 1);
+ else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
+ reg_addend = XEXP (reg_addend, 0);
+ else
+ return orig_x;
+ if (!REG_P (reg_addend)
+ && GET_CODE (reg_addend) != MULT
+ && GET_CODE (reg_addend) != ASHIFT)
+ return orig_x;
+ }
+ else
+ return orig_x;
+
+ x = XEXP (XEXP (x, 1), 0);
+ if (GET_CODE (x) == PLUS
+ && CONST_INT_P (XEXP (x, 1)))
+ {
+ const_addend = XEXP (x, 1);
+ x = XEXP (x, 0);
+ }
+
+ if (GET_CODE (x) == UNSPEC
+ && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
+ || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
+ result = XVECEXP (x, 0, 0);
+
+ if (TARGET_MACHO && darwin_local_data_pic (x)
+ && !MEM_P (orig_x))
+ result = XVECEXP (x, 0, 0);
+
+ if (! result)
+ return orig_x;
+
+ if (const_addend)
+ result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
+ if (reg_addend)
+ result = gen_rtx_PLUS (Pmode, reg_addend, result);
+ return result;
+}
+
+/* If X is a machine specific address (i.e. a symbol or label being
+ referenced as a displacement from the GOT implemented using an
+ UNSPEC), then return the base term. Otherwise return X. */
+
+rtx
+ix86_find_base_term (rtx x)
+{
+ rtx term;
+
+ if (TARGET_64BIT)
+ {
+ if (GET_CODE (x) != CONST)
+ return x;
+ term = XEXP (x, 0);
+ if (GET_CODE (term) == PLUS
+ && (CONST_INT_P (XEXP (term, 1))
+ || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
+ term = XEXP (term, 0);
+ if (GET_CODE (term) != UNSPEC
+ || XINT (term, 1) != UNSPEC_GOTPCREL)
+ return x;
+
+ return XVECEXP (term, 0, 0);
+ }
+
+ return ix86_delegitimize_address (x);
+}
+
+static void
+put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
+ int fp, FILE *file)
+{
+ const char *suffix;
+
+ if (mode == CCFPmode || mode == CCFPUmode)
+ {
+ enum rtx_code second_code, bypass_code;
+ ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
+ gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
+ code = ix86_fp_compare_code_to_integer (code);
+ mode = CCmode;
+ }
+ if (reverse)
+ code = reverse_condition (code);
+
+ switch (code)
+ {
+ case EQ:
+ switch (mode)
+ {
+ case CCAmode:
+ suffix = "a";
+ break;
+
+ case CCCmode:
+ suffix = "c";
+ break;
+
+ case CCOmode:
+ suffix = "o";
+ break;
+
+ case CCSmode:
+ suffix = "s";
+ break;
+
+ default:
+ suffix = "e";
+ }
+ break;
+ case NE:
+ switch (mode)
+ {
+ case CCAmode:
+ suffix = "na";
+ break;
+
+ case CCCmode:
+ suffix = "nc";
+ break;
+
+ case CCOmode:
+ suffix = "no";
+ break;
+
+ case CCSmode:
+ suffix = "ns";
+ break;
+
+ default:
+ suffix = "ne";
+ }
+ break;
+ case GT:
+ gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
+ suffix = "g";
+ break;
+ case GTU:
+ /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
+ Those same assemblers have the same but opposite lossage on cmov. */
+ if (mode == CCmode)
+ suffix = fp ? "nbe" : "a";
+ else if (mode == CCCmode)
+ suffix = "b";
+ else
+ gcc_unreachable ();
+ break;
+ case LT:
+ switch (mode)
+ {
+ case CCNOmode:
+ case CCGOCmode:
+ suffix = "s";
+ break;
+
+ case CCmode:
+ case CCGCmode:
+ suffix = "l";
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ break;
+ case LTU:
+ gcc_assert (mode == CCmode || mode == CCCmode);
+ suffix = "b";
+ break;
+ case GE:
+ switch (mode)
+ {
+ case CCNOmode:
+ case CCGOCmode:
+ suffix = "ns";
+ break;
+
+ case CCmode:
+ case CCGCmode:
+ suffix = "ge";
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ break;
+ case GEU:
+ /* ??? As above. */
+ gcc_assert (mode == CCmode || mode == CCCmode);
+ suffix = fp ? "nb" : "ae";
+ break;
+ case LE:
+ gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
+ suffix = "le";
+ break;
+ case LEU:
+ /* ??? As above. */
+ if (mode == CCmode)
+ suffix = "be";
+ else if (mode == CCCmode)
+ suffix = fp ? "nb" : "ae";
+ else
+ gcc_unreachable ();
+ break;
+ case UNORDERED:
+ suffix = fp ? "u" : "p";
+ break;
+ case ORDERED:
+ suffix = fp ? "nu" : "np";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ fputs (suffix, file);
+}
+
+/* Print the name of register X to FILE based on its machine mode and number.
+ If CODE is 'w', pretend the mode is HImode.
+ If CODE is 'b', pretend the mode is QImode.
+ If CODE is 'k', pretend the mode is SImode.
+ If CODE is 'q', pretend the mode is DImode.
+ If CODE is 'x', pretend the mode is V4SFmode.
+ If CODE is 't', pretend the mode is V8SFmode.
+ If CODE is 'h', pretend the reg is the 'high' byte register.
+ If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
+ If CODE is 'd', duplicate the operand for AVX instruction.
+ */
+
+void
+print_reg (rtx x, int code, FILE *file)
+{
+ const char *reg;
+ bool duplicated = code == 'd' && TARGET_AVX;
+
+ gcc_assert (x == pc_rtx
+ || (REGNO (x) != ARG_POINTER_REGNUM
+ && REGNO (x) != FRAME_POINTER_REGNUM
+ && REGNO (x) != FLAGS_REG
+ && REGNO (x) != FPSR_REG
+ && REGNO (x) != FPCR_REG));
+
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('%', file);
+
+ if (x == pc_rtx)
+ {
+ gcc_assert (TARGET_64BIT);
+ fputs ("rip", file);
+ return;
+ }
+
+ if (code == 'w' || MMX_REG_P (x))
+ code = 2;
+ else if (code == 'b')
+ code = 1;
+ else if (code == 'k')
+ code = 4;
+ else if (code == 'q')
+ code = 8;
+ else if (code == 'y')
+ code = 3;
+ else if (code == 'h')
+ code = 0;
+ else if (code == 'x')
+ code = 16;
+ else if (code == 't')
+ code = 32;
+ else
+ code = GET_MODE_SIZE (GET_MODE (x));
+
+ /* Irritatingly, AMD extended registers use different naming convention
+ from the normal registers. */
+ if (REX_INT_REG_P (x))
+ {
+ gcc_assert (TARGET_64BIT);
+ switch (code)
+ {
+ case 0:
+ error ("extended registers have no high halves");
+ break;
+ case 1:
+ fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
+ break;
+ case 2:
+ fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
+ break;
+ case 4:
+ fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
+ break;
+ case 8:
+ fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
+ break;
+ default:
+ error ("unsupported operand size for extended register");
+ break;
+ }
+ return;
+ }
+
+ reg = NULL;
+ switch (code)
+ {
+ case 3:
+ if (STACK_TOP_P (x))
+ {
+ reg = "st(0)";
+ break;
+ }
+ /* FALLTHRU */
+ case 8:
+ case 4:
+ case 12:
+ if (! ANY_FP_REG_P (x))
+ putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
+ /* FALLTHRU */
+ case 16:
+ case 2:
+ normal:
+ reg = hi_reg_name[REGNO (x)];
+ break;
+ case 1:
+ if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
+ goto normal;
+ reg = qi_reg_name[REGNO (x)];
+ break;
+ case 0:
+ if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
+ goto normal;
+ reg = qi_high_reg_name[REGNO (x)];
+ break;
+ case 32:
+ if (SSE_REG_P (x))
+ {
+ gcc_assert (!duplicated);
+ putc ('y', file);
+ fputs (hi_reg_name[REGNO (x)] + 1, file);
+ return;
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ fputs (reg, file);
+ if (duplicated)
+ {
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ fprintf (file, ", %%%s", reg);
+ else
+ fprintf (file, ", %s", reg);
+ }
+}
+
+/* Locate some local-dynamic symbol still in use by this function
+ so that we can print its name in some tls_local_dynamic_base
+ pattern. */
+
+static int
+get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
+{
+ rtx x = *px;
+
+ if (GET_CODE (x) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
+ {
+ cfun->machine->some_ld_name = XSTR (x, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+static const char *
+get_some_local_dynamic_name (void)
+{
+ rtx insn;
+
+ if (cfun->machine->some_ld_name)
+ return cfun->machine->some_ld_name;
+
+ for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+ if (INSN_P (insn)
+ && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
+ return cfun->machine->some_ld_name;
+
+ gcc_unreachable ();
+}
+
+/* Meaning of CODE:
+ L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
+ C -- print opcode suffix for set/cmov insn.
+ c -- like C, but print reversed condition
+ E,e -- likewise, but for compare-and-branch fused insn.
+ F,f -- likewise, but for floating-point.
+ O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
+ otherwise nothing
+ R -- print the prefix for register names.
+ z -- print the opcode suffix for the size of the current operand.
+ * -- print a star (in certain assembler syntax)
+ A -- print an absolute memory reference.
+ w -- print the operand as if it's a "word" (HImode) even if it isn't.
+ s -- print a shift double count, followed by the assemblers argument
+ delimiter.
+ b -- print the QImode name of the register for the indicated operand.
+ %b0 would print %al if operands[0] is reg 0.
+ w -- likewise, print the HImode name of the register.
+ k -- likewise, print the SImode name of the register.
+ q -- likewise, print the DImode name of the register.
+ x -- likewise, print the V4SFmode name of the register.
+ t -- likewise, print the V8SFmode name of the register.
+ h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
+ y -- print "st(0)" instead of "st" as a register.
+ d -- print duplicated register operand for AVX instruction.
+ D -- print condition for SSE cmp instruction.
+ P -- if PIC, print an @PLT suffix.
+ X -- don't print any sort of PIC '@' suffix for a symbol.
+ & -- print some in-use local-dynamic symbol name.
+ H -- print a memory address offset by 8; used for sse high-parts
+ Y -- print condition for SSE5 com* instruction.
+ + -- print a branch hint as 'cs' or 'ds' prefix
+ ; -- print a semicolon (after prefixes due to bug in older gas).
+ */
+
+void
+print_operand (FILE *file, rtx x, int code)
+{
+ if (code)
+ {
+ switch (code)
+ {
+ case '*':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('*', file);
+ return;
+
+ case '&':
+ assemble_name (file, get_some_local_dynamic_name ());
+ return;
+
+ case 'A':
+ switch (ASSEMBLER_DIALECT)
+ {
+ case ASM_ATT:
+ putc ('*', file);
+ break;
+
+ case ASM_INTEL:
+ /* Intel syntax. For absolute addresses, registers should not
+ be surrounded by braces. */
+ if (!REG_P (x))
+ {
+ putc ('[', file);
+ PRINT_OPERAND (file, x, 0);
+ putc (']', file);
+ return;
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ PRINT_OPERAND (file, x, 0);
+ return;
+
+
+ case 'L':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('l', file);
+ return;
+
+ case 'W':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('w', file);
+ return;
+
+ case 'B':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('b', file);
+ return;
+
+ case 'Q':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('l', file);
+ return;
+
+ case 'S':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('s', file);
+ return;
+
+ case 'T':
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('t', file);
+ return;
+
+ case 'z':
+ /* 387 opcodes don't get size suffixes if the operands are
+ registers. */
+ if (STACK_REG_P (x))
+ return;
+
+ /* Likewise if using Intel opcodes. */
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ return;
+
+ /* This is the size of op from size of operand. */
+ switch (GET_MODE_SIZE (GET_MODE (x)))
+ {
+ case 1:
+ putc ('b', file);
+ return;
+
+ case 2:
+ if (MEM_P (x))
+ {
+#ifdef HAVE_GAS_FILDS_FISTS
+ putc ('s', file);
+#endif
+ return;
+ }
+ else
+ putc ('w', file);
+ return;
+
+ case 4:
+ if (GET_MODE (x) == SFmode)
+ {
+ putc ('s', file);
+ return;
+ }
+ else
+ putc ('l', file);
+ return;
+
+ case 12:
+ case 16:
+ putc ('t', file);
+ return;
+
+ case 8:
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ {
+ if (MEM_P (x))
+ {
+#ifdef GAS_MNEMONICS
+ putc ('q', file);
+#else
+ putc ('l', file);
+ putc ('l', file);
+#endif
+ }
+ else
+ putc ('q', file);
+ }
+ else
+ putc ('l', file);
+ return;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ case 'd':
+ case 'b':
+ case 'w':
+ case 'k':
+ case 'q':
+ case 'h':
+ case 't':
+ case 'y':
+ case 'x':
+ case 'X':
+ case 'P':
+ break;
+
+ case 's':
+ if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
+ {
+ PRINT_OPERAND (file, x, 0);
+ fputs (", ", file);
+ }
+ return;
+
+ case 'D':
+ /* Little bit of braindamage here. The SSE compare instructions
+ does use completely different names for the comparisons that the
+ fp conditional moves. */
+ if (TARGET_AVX)
+ {
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ fputs ("eq", file);
+ break;
+ case UNEQ:
+ fputs ("eq_us", file);
+ break;
+ case LT:
+ fputs ("lt", file);
+ break;
+ case UNLT:
+ fputs ("nge", file);
+ break;
+ case LE:
+ fputs ("le", file);
+ break;
+ case UNLE:
+ fputs ("ngt", file);
+ break;
+ case UNORDERED:
+ fputs ("unord", file);
+ break;
+ case NE:
+ fputs ("neq", file);
+ break;
+ case LTGT:
+ fputs ("neq_oq", file);
+ break;
+ case GE:
+ fputs ("ge", file);
+ break;
+ case UNGE:
+ fputs ("nlt", file);
+ break;
+ case GT:
+ fputs ("gt", file);
+ break;
+ case UNGT:
+ fputs ("nle", file);
+ break;
+ case ORDERED:
+ fputs ("ord", file);
+ break;
+ default:
+ output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
+ return;
+ }
+ }
+ else
+ {
+ switch (GET_CODE (x))
+ {
+ case EQ:
+ case UNEQ:
+ fputs ("eq", file);
+ break;
+ case LT:
+ case UNLT:
+ fputs ("lt", file);
+ break;
+ case LE:
+ case UNLE:
+ fputs ("le", file);
+ break;
+ case UNORDERED:
+ fputs ("unord", file);
+ break;
+ case NE:
+ case LTGT:
+ fputs ("neq", file);
+ break;
+ case UNGE:
+ case GE:
+ fputs ("nlt", file);
+ break;
+ case UNGT:
+ case GT:
+ fputs ("nle", file);
+ break;
+ case ORDERED:
+ fputs ("ord", file);
+ break;
+ default:
+ output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
+ return;
+ }
+ }
+ return;
+ case 'O':
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ {
+ switch (GET_MODE (x))
+ {
+ case HImode: putc ('w', file); break;
+ case SImode:
+ case SFmode: putc ('l', file); break;
+ case DImode:
+ case DFmode: putc ('q', file); break;
+ default: gcc_unreachable ();
+ }
+ putc ('.', file);
+ }
+#endif
+ return;
+ case 'C':
+ if (!COMPARISON_P (x))
+ {
+ output_operand_lossage ("operand is neither a constant nor a "
+ "condition code, invalid operand code "
+ "'C'");
+ return;
+ }
+ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
+ return;
+ case 'F':
+ if (!COMPARISON_P (x))
+ {
+ output_operand_lossage ("operand is neither a constant nor a "
+ "condition code, invalid operand code "
+ "'F'");
+ return;
+ }
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('.', file);
+#endif
+ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
+ return;
+
+ /* Like above, but reverse condition */
+ case 'c':
+ /* Check to see if argument to %c is really a constant
+ and not a condition code which needs to be reversed. */
+ if (!COMPARISON_P (x))
+ {
+ output_operand_lossage ("operand is neither a constant nor a "
+ "condition code, invalid operand "
+ "code 'c'");
+ return;
+ }
+ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
+ return;
+ case 'f':
+ if (!COMPARISON_P (x))
+ {
+ output_operand_lossage ("operand is neither a constant nor a "
+ "condition code, invalid operand "
+ "code 'f'");
+ return;
+ }
+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('.', file);
+#endif
+ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
+ return;
+
+ case 'E':
+ put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
+ return;
+
+ case 'e':
+ put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
+ return;
+
+ case 'H':
+ /* It doesn't actually matter what mode we use here, as we're
+ only going to use this for printing. */
+ x = adjust_address_nv (x, DImode, 8);
+ break;
+
+ case '+':
+ {
+ rtx x;
+
+ if (!optimize
+ || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
+ return;
+
+ x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+ if (x)
+ {
+ int pred_val = INTVAL (XEXP (x, 0));
+
+ if (pred_val < REG_BR_PROB_BASE * 45 / 100
+ || pred_val > REG_BR_PROB_BASE * 55 / 100)
+ {
+ int taken = pred_val > REG_BR_PROB_BASE / 2;
+ int cputaken = final_forward_branch_p (current_output_insn) == 0;
+
+ /* Emit hints only in the case default branch prediction
+ heuristics would fail. */
+ if (taken != cputaken)
+ {
+ /* We use 3e (DS) prefix for taken branches and
+ 2e (CS) prefix for not taken branches. */
+ if (taken)
+ fputs ("ds ; ", file);
+ else
+ fputs ("cs ; ", file);
+ }
+ }
+ }
+ return;
+ }
+
+ case 'Y':
+ switch (GET_CODE (x))
+ {
+ case NE:
+ fputs ("neq", file);
+ break;
+ case EQ:
+ fputs ("eq", file);
+ break;
+ case GE:
+ case GEU:
+ fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
+ break;
+ case GT:
+ case GTU:
+ fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
+ break;
+ case LE:
+ case LEU:
+ fputs ("le", file);
+ break;
+ case LT:
+ case LTU:
+ fputs ("lt", file);
+ break;
+ case UNORDERED:
+ fputs ("unord", file);
+ break;
+ case ORDERED:
+ fputs ("ord", file);
+ break;
+ case UNEQ:
+ fputs ("ueq", file);
+ break;
+ case UNGE:
+ fputs ("nlt", file);
+ break;
+ case UNGT:
+ fputs ("nle", file);
+ break;
+ case UNLE:
+ fputs ("ule", file);
+ break;
+ case UNLT:
+ fputs ("ult", file);
+ break;
+ case LTGT:
+ fputs ("une", file);
+ break;
+ default:
+ output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
+ return;
+ }
+ return;
+
+ case ';':
+#if TARGET_MACHO
+ fputs (" ; ", file);
+#else
+ fputc (' ', file);
+#endif
+ return;
+
+ default:
+ output_operand_lossage ("invalid operand code '%c'", code);
+ }
+ }
+
+ if (REG_P (x))
+ print_reg (x, code, file);
+
+ else if (MEM_P (x))
+ {
+ /* No `byte ptr' prefix for call instructions or BLKmode operands. */
+ if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
+ && GET_MODE (x) != BLKmode)
+ {
+ const char * size;
+ switch (GET_MODE_SIZE (GET_MODE (x)))
+ {
+ case 1: size = "BYTE"; break;
+ case 2: size = "WORD"; break;
+ case 4: size = "DWORD"; break;
+ case 8: size = "QWORD"; break;
+ case 12: size = "XWORD"; break;
+ case 16:
+ if (GET_MODE (x) == XFmode)
+ size = "XWORD";
+ else
+ size = "XMMWORD";
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Check for explicit size override (codes 'b', 'w' and 'k') */
+ if (code == 'b')
+ size = "BYTE";
+ else if (code == 'w')
+ size = "WORD";
+ else if (code == 'k')
+ size = "DWORD";
+
+ fputs (size, file);
+ fputs (" PTR ", file);
+ }
+
+ x = XEXP (x, 0);
+ /* Avoid (%rip) for call operands. */
+ if (CONSTANT_ADDRESS_P (x) && code == 'P'
+ && !CONST_INT_P (x))
+ output_addr_const (file, x);
+ else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
+ output_operand_lossage ("invalid constraints for operand");
+ else
+ output_address (x);
+ }
+
+ else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
+ {
+ REAL_VALUE_TYPE r;
+ long l;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+ REAL_VALUE_TO_TARGET_SINGLE (r, l);
+
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('$', file);
+ fprintf (file, "0x%08lx", (long unsigned int) l);
+ }
+
+ /* These float cases don't actually occur as immediate operands. */
+ else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
+ {
+ char dstr[30];
+
+ real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
+ fprintf (file, "%s", dstr);
+ }
+
+ else if (GET_CODE (x) == CONST_DOUBLE
+ && GET_MODE (x) == XFmode)
+ {
+ char dstr[30];
+
+ real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
+ fprintf (file, "%s", dstr);
+ }
+
+ else
+ {
+ /* We have patterns that allow zero sets of memory, for instance.
+ In 64-bit mode, we should probably support all 8-byte vectors,
+ since we can in fact encode that into an immediate. */
+ if (GET_CODE (x) == CONST_VECTOR)
+ {
+ gcc_assert (x == CONST0_RTX (GET_MODE (x)));
+ x = const0_rtx;
+ }
+
+ if (code != 'P')
+ {
+ if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
+ {
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('$', file);
+ }
+ else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
+ || GET_CODE (x) == LABEL_REF)
+ {
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('$', file);
+ else
+ fputs ("OFFSET FLAT:", file);
+ }
+ }
+ if (CONST_INT_P (x))
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+ else if (flag_pic)
+ output_pic_addr_const (file, x, code);
+ else
+ output_addr_const (file, x);
+ }
+}
+
+/* Print a memory operand whose address is ADDR. */
+
+void
+print_operand_address (FILE *file, rtx addr)
+{
+ struct ix86_address parts;
+ rtx base, index, disp;
+ int scale;
+ int ok = ix86_decompose_address (addr, &parts);
+
+ gcc_assert (ok);
+
+ base = parts.base;
+ index = parts.index;
+ disp = parts.disp;
+ scale = parts.scale;
+
+ switch (parts.seg)
+ {
+ case SEG_DEFAULT:
+ break;
+ case SEG_FS:
+ case SEG_GS:
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ putc ('%', file);
+ fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Use one byte shorter RIP relative addressing for 64bit mode. */
+ if (TARGET_64BIT && !base && !index)
+ {
+ rtx symbol = disp;
+
+ if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == PLUS
+ && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
+ symbol = XEXP (XEXP (disp, 0), 0);
+
+ if (GET_CODE (symbol) == LABEL_REF
+ || (GET_CODE (symbol) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (symbol) == 0))
+ base = pc_rtx;
+ }
+ if (!base && !index)
+ {
+ /* Displacement only requires special attention. */
+
+ if (CONST_INT_P (disp))
+ {
+ if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
+ fputs ("ds:", file);
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
+ }
+ else if (flag_pic)
+ output_pic_addr_const (file, disp, 0);
+ else
+ output_addr_const (file, disp);
+ }
+ else
+ {
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ {
+ if (disp)
+ {
+ if (flag_pic)
+ output_pic_addr_const (file, disp, 0);
+ else if (GET_CODE (disp) == LABEL_REF)
+ output_asm_label (disp);
+ else
+ output_addr_const (file, disp);
+ }
+
+ putc ('(', file);
+ if (base)
+ print_reg (base, 0, file);
+ if (index)
+ {
+ putc (',', file);
+ print_reg (index, 0, file);
+ if (scale != 1)
+ fprintf (file, ",%d", scale);
+ }
+ putc (')', file);
+ }
+ else
+ {
+ rtx offset = NULL_RTX;
+
+ if (disp)
+ {
+ /* Pull out the offset of a symbol; print any symbol itself. */
+ if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == PLUS
+ && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
+ {
+ offset = XEXP (XEXP (disp, 0), 1);
+ disp = gen_rtx_CONST (VOIDmode,
+ XEXP (XEXP (disp, 0), 0));
+ }
+
+ if (flag_pic)
+ output_pic_addr_const (file, disp, 0);
+ else if (GET_CODE (disp) == LABEL_REF)
+ output_asm_label (disp);
+ else if (CONST_INT_P (disp))
+ offset = disp;
+ else
+ output_addr_const (file, disp);
+ }
+
+ putc ('[', file);
+ if (base)
+ {
+ print_reg (base, 0, file);
+ if (offset)
+ {
+ if (INTVAL (offset) >= 0)
+ putc ('+', file);
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
+ }
+ }
+ else if (offset)
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
+ else
+ putc ('0', file);
+
+ if (index)
+ {
+ putc ('+', file);
+ print_reg (index, 0, file);
+ if (scale != 1)
+ fprintf (file, "*%d", scale);
+ }
+ putc (']', file);
+ }
+ }
+}
+
+bool
+output_addr_const_extra (FILE *file, rtx x)
+{
+ rtx op;
+
+ if (GET_CODE (x) != UNSPEC)
+ return false;
+
+ op = XVECEXP (x, 0, 0);
+ switch (XINT (x, 1))
+ {
+ case UNSPEC_GOTTPOFF:
+ output_addr_const (file, op);
+ /* FIXME: This might be @TPOFF in Sun ld. */
+ fputs ("@GOTTPOFF", file);
+ break;
+ case UNSPEC_TPOFF:
+ output_addr_const (file, op);
+ fputs ("@TPOFF", file);
+ break;
+ case UNSPEC_NTPOFF:
+ output_addr_const (file, op);
+ if (TARGET_64BIT)
+ fputs ("@TPOFF", file);
+ else
+ fputs ("@NTPOFF", file);
+ break;
+ case UNSPEC_DTPOFF:
+ output_addr_const (file, op);
+ fputs ("@DTPOFF", file);
+ break;
+ case UNSPEC_GOTNTPOFF:
+ output_addr_const (file, op);
+ if (TARGET_64BIT)
+ fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+ "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
+ else
+ fputs ("@GOTNTPOFF", file);
+ break;
+ case UNSPEC_INDNTPOFF:
+ output_addr_const (file, op);
+ fputs ("@INDNTPOFF", file);
+ break;
+#if TARGET_MACHO
+ case UNSPEC_MACHOPIC_OFFSET:
+ output_addr_const (file, op);
+ putc ('-', file);
+ machopic_output_function_base_name (file);
+ break;
+#endif
+
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+/* Split one or more DImode RTL references into pairs of SImode
+ references. The RTL can be REG, offsettable MEM, integer constant, or
+ CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
+ split and "num" is its length. lo_half and hi_half are output arrays
+ that parallel "operands". */
+
+void
+split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
+{
+ while (num--)
+ {
+ rtx op = operands[num];
+
+ /* simplify_subreg refuse to split volatile memory addresses,
+ but we still have to handle it. */
+ if (MEM_P (op))
+ {
+ lo_half[num] = adjust_address (op, SImode, 0);
+ hi_half[num] = adjust_address (op, SImode, 4);
+ }
+ else
+ {
+ lo_half[num] = simplify_gen_subreg (SImode, op,
+ GET_MODE (op) == VOIDmode
+ ? DImode : GET_MODE (op), 0);
+ hi_half[num] = simplify_gen_subreg (SImode, op,
+ GET_MODE (op) == VOIDmode
+ ? DImode : GET_MODE (op), 4);
+ }
+ }
+}
+/* Split one or more TImode RTL references into pairs of DImode
+ references. The RTL can be REG, offsettable MEM, integer constant, or
+ CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
+ split and "num" is its length. lo_half and hi_half are output arrays
+ that parallel "operands". */
+
+void
+split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
+{
+ while (num--)
+ {
+ rtx op = operands[num];
+
+ /* simplify_subreg refuse to split volatile memory addresses, but we
+ still have to handle it. */
+ if (MEM_P (op))
+ {
+ lo_half[num] = adjust_address (op, DImode, 0);
+ hi_half[num] = adjust_address (op, DImode, 8);
+ }
+ else
+ {
+ lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
+ hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
+ }
+ }
+}
+
+/* Output code to perform a 387 binary operation in INSN, one of PLUS,
+ MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
+ is the expression of the binary operation. The output may either be
+ emitted here, or returned to the caller, like all output_* functions.
+
+ There is no guarantee that the operands are the same mode, as they
+ might be within FLOAT or FLOAT_EXTEND expressions. */
+
+#ifndef SYSV386_COMPAT
+/* Set to 1 for compatibility with brain-damaged assemblers. No-one
+ wants to fix the assemblers because that causes incompatibility
+ with gcc. No-one wants to fix gcc because that causes
+ incompatibility with assemblers... You can use the option of
+ -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
+#define SYSV386_COMPAT 1
+#endif
+
+const char *
+output_387_binary_op (rtx insn, rtx *operands)
+{
+ static char buf[40];
+ const char *p;
+ const char *ssep;
+ int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
+
+#ifdef ENABLE_CHECKING
+ /* Even if we do not want to check the inputs, this documents input
+ constraints. Which helps in understanding the following code. */
+ if (STACK_REG_P (operands[0])
+ && ((REG_P (operands[1])
+ && REGNO (operands[0]) == REGNO (operands[1])
+ && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
+ || (REG_P (operands[2])
+ && REGNO (operands[0]) == REGNO (operands[2])
+ && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
+ && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
+ ; /* ok */
+ else
+ gcc_assert (is_sse);
+#endif
+
+ switch (GET_CODE (operands[3]))
+ {
+ case PLUS:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+ p = "fiadd";
+ else
+ p = "fadd";
+ ssep = "vadd";
+ break;
+
+ case MINUS:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+ p = "fisub";
+ else
+ p = "fsub";
+ ssep = "vsub";
+ break;
+
+ case MULT:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+ p = "fimul";
+ else
+ p = "fmul";
+ ssep = "vmul";
+ break;
+
+ case DIV:
+ if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
+ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
+ p = "fidiv";
+ else
+ p = "fdiv";
+ ssep = "vdiv";
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (is_sse)
+ {
+ if (TARGET_AVX)
+ {
+ strcpy (buf, ssep);
+ if (GET_MODE (operands[0]) == SFmode)
+ strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
+ else
+ strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
+ }
+ else
+ {
+ strcpy (buf, ssep + 1);
+ if (GET_MODE (operands[0]) == SFmode)
+ strcat (buf, "ss\t{%2, %0|%0, %2}");
+ else
+ strcat (buf, "sd\t{%2, %0|%0, %2}");
+ }
+ return buf;
+ }
+ strcpy (buf, p);
+
+ switch (GET_CODE (operands[3]))
+ {
+ case MULT:
+ case PLUS:
+ if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
+ {
+ rtx temp = operands[2];
+ operands[2] = operands[1];
+ operands[1] = temp;
+ }
+
+ /* know operands[0] == operands[1]. */
+
+ if (MEM_P (operands[2]))
+ {
+ p = "%z2\t%2";
+ break;
+ }
+
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
+ {
+ if (STACK_TOP_P (operands[0]))
+ /* How is it that we are storing to a dead operand[2]?
+ Well, presumably operands[1] is dead too. We can't
+ store the result to st(0) as st(0) gets popped on this
+ instruction. Instead store to operands[2] (which I
+ think has to be st(1)). st(1) will be popped later.
+ gcc <= 2.8.1 didn't have this check and generated
+ assembly code that the Unixware assembler rejected. */
+ p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
+ else
+ p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
+ break;
+ }
+
+ if (STACK_TOP_P (operands[0]))
+ p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
+ else
+ p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
+ break;
+
+ case MINUS:
+ case DIV:
+ if (MEM_P (operands[1]))
+ {
+ p = "r%z1\t%1";
+ break;
+ }
+
+ if (MEM_P (operands[2]))
+ {
+ p = "%z2\t%2";
+ break;
+ }
+
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
+ {
+#if SYSV386_COMPAT
+ /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
+ derived assemblers, confusingly reverse the direction of
+ the operation for fsub{r} and fdiv{r} when the
+ destination register is not st(0). The Intel assembler
+ doesn't have this brain damage. Read !SYSV386_COMPAT to
+ figure out what the hardware really does. */
+ if (STACK_TOP_P (operands[0]))
+ p = "{p\t%0, %2|rp\t%2, %0}";
+ else
+ p = "{rp\t%2, %0|p\t%0, %2}";
+#else
+ if (STACK_TOP_P (operands[0]))
+ /* As above for fmul/fadd, we can't store to st(0). */
+ p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
+ else
+ p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
+#endif
+ break;
+ }
+
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ {
+#if SYSV386_COMPAT
+ if (STACK_TOP_P (operands[0]))
+ p = "{rp\t%0, %1|p\t%1, %0}";
+ else
+ p = "{p\t%1, %0|rp\t%0, %1}";
+#else
+ if (STACK_TOP_P (operands[0]))
+ p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
+ else
+ p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
+#endif
+ break;
+ }
+
+ if (STACK_TOP_P (operands[0]))
+ {
+ if (STACK_TOP_P (operands[1]))
+ p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
+ else
+ p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
+ break;
+ }
+ else if (STACK_TOP_P (operands[1]))
+ {
+#if SYSV386_COMPAT
+ p = "{\t%1, %0|r\t%0, %1}";
+#else
+ p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
+#endif
+ }
+ else
+ {
+#if SYSV386_COMPAT
+ p = "{r\t%2, %0|\t%0, %2}";
+#else
+ p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
+#endif
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ strcat (buf, p);
+ return buf;
+}
+
+/* Return needed mode for entity in optimize_mode_switching pass. */
+
+int
+ix86_mode_needed (int entity, rtx insn)
+{
+ enum attr_i387_cw mode;
+
+ /* The mode UNINITIALIZED is used to store control word after a
+ function call or ASM pattern. The mode ANY specify that function
+ has no requirements on the control word and make no changes in the
+ bits we are interested in. */
+
+ if (CALL_P (insn)
+ || (NONJUMP_INSN_P (insn)
+ && (asm_noperands (PATTERN (insn)) >= 0
+ || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
+ return I387_CW_UNINITIALIZED;
+
+ if (recog_memoized (insn) < 0)
+ return I387_CW_ANY;
+
+ mode = get_attr_i387_cw (insn);
+
+ switch (entity)
+ {
+ case I387_TRUNC:
+ if (mode == I387_CW_TRUNC)
+ return mode;
+ break;
+
+ case I387_FLOOR:
+ if (mode == I387_CW_FLOOR)
+ return mode;
+ break;
+
+ case I387_CEIL:
+ if (mode == I387_CW_CEIL)
+ return mode;
+ break;
+
+ case I387_MASK_PM:
+ if (mode == I387_CW_MASK_PM)
+ return mode;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return I387_CW_ANY;
+}
+
+/* Output code to initialize control word copies used by trunc?f?i and
+ rounding patterns. CURRENT_MODE is set to current control word,
+ while NEW_MODE is set to new control word. */
+
+void
+emit_i387_cw_initialization (int mode)
+{
+ rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ rtx new_mode;
+
+ enum ix86_stack_slot slot;
+
+ rtx reg = gen_reg_rtx (HImode);
+
+ emit_insn (gen_x86_fnstcw_1 (stored_mode));
+ emit_move_insn (reg, copy_rtx (stored_mode));
+
+ if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
+ || optimize_function_for_size_p (cfun))
+ {
+ switch (mode)
+ {
+ case I387_CW_TRUNC:
+ /* round toward zero (truncate) */
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
+ slot = SLOT_CW_TRUNC;
+ break;
+
+ case I387_CW_FLOOR:
+ /* round down toward -oo */
+ emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
+ slot = SLOT_CW_FLOOR;
+ break;
+
+ case I387_CW_CEIL:
+ /* round up toward +oo */
+ emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
+ slot = SLOT_CW_CEIL;
+ break;
+
+ case I387_CW_MASK_PM:
+ /* mask precision exception for nearbyint() */
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
+ slot = SLOT_CW_MASK_PM;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ switch (mode)
+ {
+ case I387_CW_TRUNC:
+ /* round toward zero (truncate) */
+ emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
+ slot = SLOT_CW_TRUNC;
+ break;
+
+ case I387_CW_FLOOR:
+ /* round down toward -oo */
+ emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
+ slot = SLOT_CW_FLOOR;
+ break;
+
+ case I387_CW_CEIL:
+ /* round up toward +oo */
+ emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
+ slot = SLOT_CW_CEIL;
+ break;
+
+ case I387_CW_MASK_PM:
+ /* mask precision exception for nearbyint() */
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
+ slot = SLOT_CW_MASK_PM;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ gcc_assert (slot < MAX_386_STACK_LOCALS);
+
+ new_mode = assign_386_stack_local (HImode, slot);
+ emit_move_insn (new_mode, reg);
+}
+
+/* Output code for INSN to convert a float to a signed int. OPERANDS
+ are the insn operands. The output may be [HSD]Imode and the input
+ operand may be [SDX]Fmode. */
+
+const char *
+output_fix_trunc (rtx insn, rtx *operands, int fisttp)
+{
+ int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
+ int dimode_p = GET_MODE (operands[0]) == DImode;
+ int round_mode = get_attr_i387_cw (insn);
+
+ /* Jump through a hoop or two for DImode, since the hardware has no
+ non-popping instruction. We used to do this a different way, but
+ that was somewhat fragile and broke with post-reload splitters. */
+ if ((dimode_p || fisttp) && !stack_top_dies)
+ output_asm_insn ("fld\t%y1", operands);
+
+ gcc_assert (STACK_TOP_P (operands[1]));
+ gcc_assert (MEM_P (operands[0]));
+ gcc_assert (GET_MODE (operands[1]) != TFmode);
+
+ if (fisttp)
+ output_asm_insn ("fisttp%z0\t%0", operands);
+ else
+ {
+ if (round_mode != I387_CW_ANY)
+ output_asm_insn ("fldcw\t%3", operands);
+ if (stack_top_dies || dimode_p)
+ output_asm_insn ("fistp%z0\t%0", operands);
+ else
+ output_asm_insn ("fist%z0\t%0", operands);
+ if (round_mode != I387_CW_ANY)
+ output_asm_insn ("fldcw\t%2", operands);
+ }
+
+ return "";
+}
+
+/* Output code for x87 ffreep insn. The OPNO argument, which may only
+ have the values zero or one, indicates the ffreep insn's operand
+ from the OPERANDS array. */
+
+static const char *
+output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
+{
+ if (TARGET_USE_FFREEP)
+#if HAVE_AS_IX86_FFREEP
+ return opno ? "ffreep\t%y1" : "ffreep\t%y0";
+#else
+ {
+ static char retval[] = ".word\t0xc_df";
+ int regno = REGNO (operands[opno]);
+
+ gcc_assert (FP_REGNO_P (regno));
+
+ retval[9] = '0' + (regno - FIRST_STACK_REG);
+ return retval;
+ }
+#endif
+
+ return opno ? "fstp\t%y1" : "fstp\t%y0";
+}
+
+
+/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
+ should be used. UNORDERED_P is true when fucom should be used. */
+
+const char *
+output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
+{
+ int stack_top_dies;
+ rtx cmp_op0, cmp_op1;
+ int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
+
+ if (eflags_p)
+ {
+ cmp_op0 = operands[0];
+ cmp_op1 = operands[1];
+ }
+ else
+ {
+ cmp_op0 = operands[1];
+ cmp_op1 = operands[2];
+ }
+
+ if (is_sse)
+ {
+ static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
+ static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
+ static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
+ static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
+
+ if (GET_MODE (operands[0]) == SFmode)
+ if (unordered_p)
+ return &ucomiss[TARGET_AVX ? 0 : 1];
+ else
+ return &comiss[TARGET_AVX ? 0 : 1];
+ else
+ if (unordered_p)
+ return &ucomisd[TARGET_AVX ? 0 : 1];
+ else
+ return &comisd[TARGET_AVX ? 0 : 1];
+ }
+
+ gcc_assert (STACK_TOP_P (cmp_op0));
+
+ stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
+
+ if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
+ {
+ if (stack_top_dies)
+ {
+ output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
+ return output_387_ffreep (operands, 1);
+ }
+ else
+ return "ftst\n\tfnstsw\t%0";
+ }
+
+ if (STACK_REG_P (cmp_op1)
+ && stack_top_dies
+ && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
+ && REGNO (cmp_op1) != FIRST_STACK_REG)
+ {
+ /* If both the top of the 387 stack dies, and the other operand
+ is also a stack register that dies, then this must be a
+ `fcompp' float compare */
+
+ if (eflags_p)
+ {
+ /* There is no double popping fcomi variant. Fortunately,
+ eflags is immune from the fstp's cc clobbering. */
+ if (unordered_p)
+ output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
+ else
+ output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
+ return output_387_ffreep (operands, 0);
+ }
+ else
+ {
+ if (unordered_p)
+ return "fucompp\n\tfnstsw\t%0";
+ else
+ return "fcompp\n\tfnstsw\t%0";
+ }
+ }
+ else
+ {
+ /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
+
+ static const char * const alt[16] =
+ {
+ "fcom%z2\t%y2\n\tfnstsw\t%0",
+ "fcomp%z2\t%y2\n\tfnstsw\t%0",
+ "fucom%z2\t%y2\n\tfnstsw\t%0",
+ "fucomp%z2\t%y2\n\tfnstsw\t%0",
+
+ "ficom%z2\t%y2\n\tfnstsw\t%0",
+ "ficomp%z2\t%y2\n\tfnstsw\t%0",
+ NULL,
+ NULL,
+
+ "fcomi\t{%y1, %0|%0, %y1}",
+ "fcomip\t{%y1, %0|%0, %y1}",
+ "fucomi\t{%y1, %0|%0, %y1}",
+ "fucomip\t{%y1, %0|%0, %y1}",
+
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ };
+
+ int mask;
+ const char *ret;
+
+ mask = eflags_p << 3;
+ mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
+ mask |= unordered_p << 1;
+ mask |= stack_top_dies;
+
+ gcc_assert (mask < 16);
+ ret = alt[mask];
+ gcc_assert (ret);
+
+ return ret;
+ }
+}
+
+void
+ix86_output_addr_vec_elt (FILE *file, int value)
+{
+ const char *directive = ASM_LONG;
+
+#ifdef ASM_QUAD
+ if (TARGET_64BIT)
+ directive = ASM_QUAD;
+#else
+ gcc_assert (!TARGET_64BIT);
+#endif
+
+ fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
+}
+
+void
+ix86_output_addr_diff_elt (FILE *file, int value, int rel)
+{
+ const char *directive = ASM_LONG;
+
+#ifdef ASM_QUAD
+ if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
+ directive = ASM_QUAD;
+#else
+ gcc_assert (!TARGET_64BIT);
+#endif
+ /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
+ if (TARGET_64BIT || TARGET_VXWORKS_RTP)
+ fprintf (file, "%s%s%d-%s%d\n",
+ directive, LPREFIX, value, LPREFIX, rel);
+ else if (HAVE_AS_GOTOFF_IN_DATA)
+ fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
+#if TARGET_MACHO
+ else if (TARGET_MACHO)
+ {
+ fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
+ machopic_output_function_base_name (file);
+ fprintf(file, "\n");
+ }
+#endif
+ else
+ asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
+ ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
+}
+
+/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
+ for the target. */
+
+void
+ix86_expand_clear (rtx dest)
+{
+ rtx tmp;
+
+ /* We play register width games, which are only valid after reload. */
+ gcc_assert (reload_completed);
+
+ /* Avoid HImode and its attendant prefix byte. */
+ if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
+ dest = gen_rtx_REG (SImode, REGNO (dest));
+ tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
+
+ /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
+ if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
+ {
+ rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
+ }
+
+ emit_insn (tmp);
+}
+
+/* X is an unchanging MEM. If it is a constant pool reference, return
+ the constant pool rtx, else NULL. */
+
+rtx
+maybe_get_pool_constant (rtx x)
+{
+ x = ix86_delegitimize_address (XEXP (x, 0));
+
+ if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+ return get_pool_constant (x);
+
+ return NULL_RTX;
+}
+
+void
+ix86_expand_move (enum machine_mode mode, rtx operands[])
+{
+ rtx op0, op1;
+ enum tls_model model;
+
+ op0 = operands[0];
+ op1 = operands[1];
+
+ if (GET_CODE (op1) == SYMBOL_REF)
+ {
+ model = SYMBOL_REF_TLS_MODEL (op1);
+ if (model)
+ {
+ op1 = legitimize_tls_address (op1, model, true);
+ op1 = force_operand (op1, op0);
+ if (op1 == op0)
+ return;
+ }
+ else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
+ && SYMBOL_REF_DLLIMPORT_P (op1))
+ op1 = legitimize_dllimport_symbol (op1, false);
+ }
+ else if (GET_CODE (op1) == CONST
+ && GET_CODE (XEXP (op1, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
+ {
+ rtx addend = XEXP (XEXP (op1, 0), 1);
+ rtx symbol = XEXP (XEXP (op1, 0), 0);
+ rtx tmp = NULL;
+
+ model = SYMBOL_REF_TLS_MODEL (symbol);
+ if (model)
+ tmp = legitimize_tls_address (symbol, model, true);
+ else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
+ && SYMBOL_REF_DLLIMPORT_P (symbol))
+ tmp = legitimize_dllimport_symbol (symbol, true);
+
+ if (tmp)
+ {
+ tmp = force_operand (tmp, NULL);
+ tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
+ op0, 1, OPTAB_DIRECT);
+ if (tmp == op0)
+ return;
+ }
+ }
+
+ if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
+ {
+ if (TARGET_MACHO && !TARGET_64BIT)
+ {
+#if TARGET_MACHO
+ if (MACHOPIC_PURE)
+ {
+ rtx temp = ((reload_in_progress
+ || ((op0 && REG_P (op0))
+ && mode == Pmode))
+ ? op0 : gen_reg_rtx (Pmode));
+ op1 = machopic_indirect_data_reference (op1, temp);
+ op1 = machopic_legitimize_pic_address (op1, mode,
+ temp == op1 ? 0 : temp);
+ }
+ else if (MACHOPIC_INDIRECT)
+ op1 = machopic_indirect_data_reference (op1, 0);
+ if (op0 == op1)
+ return;
+#endif
+ }
+ else
+ {
+ if (MEM_P (op0))
+ op1 = force_reg (Pmode, op1);
+ else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
+ {
+ rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
+ op1 = legitimize_pic_address (op1, reg);
+ if (op0 == op1)
+ return;
+ }
+ }
+ }
+ else
+ {
+ if (MEM_P (op0)
+ && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
+ || !push_operand (op0, mode))
+ && MEM_P (op1))
+ op1 = force_reg (mode, op1);
+
+ if (push_operand (op0, mode)
+ && ! general_no_elim_operand (op1, mode))
+ op1 = copy_to_mode_reg (mode, op1);
+
+ /* Force large constants in 64bit compilation into register
+ to get them CSEed. */
+ if (can_create_pseudo_p ()
+ && (mode == DImode) && TARGET_64BIT
+ && immediate_operand (op1, mode)
+ && !x86_64_zext_immediate_operand (op1, VOIDmode)
+ && !register_operand (op0, mode)
+ && optimize)
+ op1 = copy_to_mode_reg (mode, op1);
+
+ if (can_create_pseudo_p ()
+ && FLOAT_MODE_P (mode)
+ && GET_CODE (op1) == CONST_DOUBLE)
+ {
+ /* If we are loading a floating point constant to a register,
+ force the value to memory now, since we'll get better code
+ out the back end. */
+
+ op1 = validize_mem (force_const_mem (mode, op1));
+ if (!register_operand (op0, mode))
+ {
+ rtx temp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
+ emit_move_insn (op0, temp);
+ return;
+ }
+ }
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+}
+
+void
+ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
+{
+ rtx op0 = operands[0], op1 = operands[1];
+ unsigned int align = GET_MODE_ALIGNMENT (mode);
+
+ /* Force constants other than zero into memory. We do not know how
+ the instructions used to build constants modify the upper 64 bits
+ of the register, once we have that information we may be able
+ to handle some of them more efficiently. */
+ if (can_create_pseudo_p ()
+ && register_operand (op0, mode)
+ && (CONSTANT_P (op1)
+ || (GET_CODE (op1) == SUBREG
+ && CONSTANT_P (SUBREG_REG (op1))))
+ && standard_sse_constant_p (op1) <= 0)
+ op1 = validize_mem (force_const_mem (mode, op1));
+
+ /* We need to check memory alignment for SSE mode since attribute
+ can make operands unaligned. */
+ if (can_create_pseudo_p ()
+ && SSE_REG_MODE_P (mode)
+ && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
+ || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
+ {
+ rtx tmp[2];
+
+ /* ix86_expand_vector_move_misalign() does not like constants ... */
+ if (CONSTANT_P (op1)
+ || (GET_CODE (op1) == SUBREG
+ && CONSTANT_P (SUBREG_REG (op1))))
+ op1 = validize_mem (force_const_mem (mode, op1));
+
+ /* ... nor both arguments in memory. */
+ if (!register_operand (op0, mode)
+ && !register_operand (op1, mode))
+ op1 = force_reg (mode, op1);
+
+ tmp[0] = op0; tmp[1] = op1;
+ ix86_expand_vector_move_misalign (mode, tmp);
+ return;
+ }
+
+ /* Make operand1 a register if it isn't already. */
+ if (can_create_pseudo_p ()
+ && !register_operand (op0, mode)
+ && !register_operand (op1, mode))
+ {
+ emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
+ return;
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+}
+
+/* Implement the movmisalign patterns for SSE. Non-SSE modes go
+ straight to ix86_expand_vector_move. */
+/* Code generation for scalar reg-reg moves of single and double precision data:
+ if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
+ movaps reg, reg
+ else
+ movss reg, reg
+ if (x86_sse_partial_reg_dependency == true)
+ movapd reg, reg
+ else
+ movsd reg, reg
+
+ Code generation for scalar loads of double precision data:
+ if (x86_sse_split_regs == true)
+ movlpd mem, reg (gas syntax)
+ else
+ movsd mem, reg
+
+ Code generation for unaligned packed loads of single precision data
+ (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
+ if (x86_sse_unaligned_move_optimal)
+ movups mem, reg
+
+ if (x86_sse_partial_reg_dependency == true)
+ {
+ xorps reg, reg
+ movlps mem, reg
+ movhps mem+8, reg
+ }
+ else
+ {
+ movlps mem, reg
+ movhps mem+8, reg
+ }
+
+ Code generation for unaligned packed loads of double precision data
+ (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
+ if (x86_sse_unaligned_move_optimal)
+ movupd mem, reg
+
+ if (x86_sse_split_regs == true)
+ {
+ movlpd mem, reg
+ movhpd mem+8, reg
+ }
+ else
+ {
+ movsd mem, reg
+ movhpd mem+8, reg
+ }
+ */
+
+void
+ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
+{
+ rtx op0, op1, m;
+
+ op0 = operands[0];
+ op1 = operands[1];
+
+ if (TARGET_AVX)
+ {
+ switch (GET_MODE_CLASS (mode))
+ {
+ case MODE_VECTOR_INT:
+ case MODE_INT:
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 16:
+ op0 = gen_lowpart (V16QImode, op0);
+ op1 = gen_lowpart (V16QImode, op1);
+ emit_insn (gen_avx_movdqu (op0, op1));
+ break;
+ case 32:
+ op0 = gen_lowpart (V32QImode, op0);
+ op1 = gen_lowpart (V32QImode, op1);
+ emit_insn (gen_avx_movdqu256 (op0, op1));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ break;
+ case MODE_VECTOR_FLOAT:
+ op0 = gen_lowpart (mode, op0);
+ op1 = gen_lowpart (mode, op1);
+
+ switch (mode)
+ {
+ case V4SFmode:
+ emit_insn (gen_avx_movups (op0, op1));
+ break;
+ case V8SFmode:
+ emit_insn (gen_avx_movups256 (op0, op1));
+ break;
+ case V2DFmode:
+ emit_insn (gen_avx_movupd (op0, op1));
+ break;
+ case V4DFmode:
+ emit_insn (gen_avx_movupd256 (op0, op1));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return;
+ }
+
+ if (MEM_P (op1))
+ {
+ /* If we're optimizing for size, movups is the smallest. */
+ if (optimize_insn_for_size_p ())
+ {
+ op0 = gen_lowpart (V4SFmode, op0);
+ op1 = gen_lowpart (V4SFmode, op1);
+ emit_insn (gen_sse_movups (op0, op1));
+ return;
+ }
+
+ /* ??? If we have typed data, then it would appear that using
+ movdqu is the only way to get unaligned data loaded with
+ integer type. */
+ if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+ {
+ op0 = gen_lowpart (V16QImode, op0);
+ op1 = gen_lowpart (V16QImode, op1);
+ emit_insn (gen_sse2_movdqu (op0, op1));
+ return;
+ }
+
+ if (TARGET_SSE2 && mode == V2DFmode)
+ {
+ rtx zero;
+
+ if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
+ {
+ op0 = gen_lowpart (V2DFmode, op0);
+ op1 = gen_lowpart (V2DFmode, op1);
+ emit_insn (gen_sse2_movupd (op0, op1));
+ return;
+ }
+
+ /* When SSE registers are split into halves, we can avoid
+ writing to the top half twice. */
+ if (TARGET_SSE_SPLIT_REGS)
+ {
+ emit_clobber (op0);
+ zero = op0;
+ }
+ else
+ {
+ /* ??? Not sure about the best option for the Intel chips.
+ The following would seem to satisfy; the register is
+ entirely cleared, breaking the dependency chain. We
+ then store to the upper half, with a dependency depth
+ of one. A rumor has it that Intel recommends two movsd
+ followed by an unpacklpd, but this is unconfirmed. And
+ given that the dependency depth of the unpacklpd would
+ still be one, I'm not sure why this would be better. */
+ zero = CONST0_RTX (V2DFmode);
+ }
+
+ m = adjust_address (op1, DFmode, 0);
+ emit_insn (gen_sse2_loadlpd (op0, zero, m));
+ m = adjust_address (op1, DFmode, 8);
+ emit_insn (gen_sse2_loadhpd (op0, op0, m));
+ }
+ else
+ {
+ if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
+ {
+ op0 = gen_lowpart (V4SFmode, op0);
+ op1 = gen_lowpart (V4SFmode, op1);
+ emit_insn (gen_sse_movups (op0, op1));
+ return;
+ }
+
+ if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
+ emit_move_insn (op0, CONST0_RTX (mode));
+ else
+ emit_clobber (op0);
+
+ if (mode != V4SFmode)
+ op0 = gen_lowpart (V4SFmode, op0);
+ m = adjust_address (op1, V2SFmode, 0);
+ emit_insn (gen_sse_loadlps (op0, op0, m));
+ m = adjust_address (op1, V2SFmode, 8);
+ emit_insn (gen_sse_loadhps (op0, op0, m));
+ }
+ }
+ else if (MEM_P (op0))
+ {
+ /* If we're optimizing for size, movups is the smallest. */
+ if (optimize_insn_for_size_p ())
+ {
+ op0 = gen_lowpart (V4SFmode, op0);
+ op1 = gen_lowpart (V4SFmode, op1);
+ emit_insn (gen_sse_movups (op0, op1));
+ return;
+ }
+
+ /* ??? Similar to above, only less clear because of quote
+ typeless stores unquote. */
+ if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
+ && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+ {
+ op0 = gen_lowpart (V16QImode, op0);
+ op1 = gen_lowpart (V16QImode, op1);
+ emit_insn (gen_sse2_movdqu (op0, op1));
+ return;
+ }
+
+ if (TARGET_SSE2 && mode == V2DFmode)
+ {
+ m = adjust_address (op0, DFmode, 0);
+ emit_insn (gen_sse2_storelpd (m, op1));
+ m = adjust_address (op0, DFmode, 8);
+ emit_insn (gen_sse2_storehpd (m, op1));
+ }
+ else
+ {
+ if (mode != V4SFmode)
+ op1 = gen_lowpart (V4SFmode, op1);
+ m = adjust_address (op0, V2SFmode, 0);
+ emit_insn (gen_sse_storelps (m, op1));
+ m = adjust_address (op0, V2SFmode, 8);
+ emit_insn (gen_sse_storehps (m, op1));
+ }
+ }
+ else
+ gcc_unreachable ();
+}
+
+/* Expand a push in MODE. This is some mode for which we do not support
+ proper push instructions, at least from the registers that we expect
+ the value to live in. */
+
+void
+ix86_expand_push (enum machine_mode mode, rtx x)
+{
+ rtx tmp;
+
+ tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
+ GEN_INT (-GET_MODE_SIZE (mode)),
+ stack_pointer_rtx, 1, OPTAB_DIRECT);
+ if (tmp != stack_pointer_rtx)
+ emit_move_insn (stack_pointer_rtx, tmp);
+
+ tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
+
+ /* When we push an operand onto stack, it has to be aligned at least
+ at the function argument boundary. However since we don't have
+ the argument type, we can't determine the actual argument
+ boundary. */
+ emit_move_insn (tmp, x);
+}
+
+/* Helper function of ix86_fixup_binary_operands to canonicalize
+ operand order. Returns true if the operands should be swapped. */
+
+static bool
+ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ rtx dst = operands[0];
+ rtx src1 = operands[1];
+ rtx src2 = operands[2];
+
+ /* If the operation is not commutative, we can't do anything. */
+ if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
+ return false;
+
+ /* Highest priority is that src1 should match dst. */
+ if (rtx_equal_p (dst, src1))
+ return false;
+ if (rtx_equal_p (dst, src2))
+ return true;
+
+ /* Next highest priority is that immediate constants come second. */
+ if (immediate_operand (src2, mode))
+ return false;
+ if (immediate_operand (src1, mode))
+ return true;
+
+ /* Lowest priority is that memory references should come second. */
+ if (MEM_P (src2))
+ return false;
+ if (MEM_P (src1))
+ return true;
+
+ return false;
+}
+
+
+/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
+ destination to use for the operation. If different from the true
+ destination in operands[0], a copy operation will be required. */
+
+rtx
+ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ rtx dst = operands[0];
+ rtx src1 = operands[1];
+ rtx src2 = operands[2];
+
+ /* Canonicalize operand order. */
+ if (ix86_swap_binary_operands_p (code, mode, operands))
+ {
+ rtx temp;
+
+ /* It is invalid to swap operands of different modes. */
+ gcc_assert (GET_MODE (src1) == GET_MODE (src2));
+
+ temp = src1;
+ src1 = src2;
+ src2 = temp;
+ }
+
+ /* Both source operands cannot be in memory. */
+ if (MEM_P (src1) && MEM_P (src2))
+ {
+ /* Optimization: Only read from memory once. */
+ if (rtx_equal_p (src1, src2))
+ {
+ src2 = force_reg (mode, src2);
+ src1 = src2;
+ }
+ else
+ src2 = force_reg (mode, src2);
+ }
+
+ /* If the destination is memory, and we do not have matching source
+ operands, do things in registers. */
+ if (MEM_P (dst) && !rtx_equal_p (dst, src1))
+ dst = gen_reg_rtx (mode);
+
+ /* Source 1 cannot be a constant. */
+ if (CONSTANT_P (src1))
+ src1 = force_reg (mode, src1);
+
+ /* Source 1 cannot be a non-matching memory. */
+ if (MEM_P (src1) && !rtx_equal_p (dst, src1))
+ src1 = force_reg (mode, src1);
+
+ operands[1] = src1;
+ operands[2] = src2;
+ return dst;
+}
+
+/* Similarly, but assume that the destination has already been
+ set up properly. */
+
+void
+ix86_fixup_binary_operands_no_copy (enum rtx_code code,
+ enum machine_mode mode, rtx operands[])
+{
+ rtx dst = ix86_fixup_binary_operands (code, mode, operands);
+ gcc_assert (dst == operands[0]);
+}
+
+/* Attempt to expand a binary operator. Make the expansion closer to the
+ actual machine, then just general_operand, which will allow 3 separate
+ memory references (one output, two input) in a single insn. */
+
+void
+ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ rtx src1, src2, dst, op, clob;
+
+ dst = ix86_fixup_binary_operands (code, mode, operands);
+ src1 = operands[1];
+ src2 = operands[2];
+
+ /* Emit the instruction. */
+
+ op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
+ if (reload_in_progress)
+ {
+ /* Reload doesn't know about the flags register, and doesn't know that
+ it doesn't want to clobber it. We can only do this with PLUS. */
+ gcc_assert (code == PLUS);
+ emit_insn (op);
+ }
+ else
+ {
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+ }
+
+ /* Fix up the destination if needed. */
+ if (dst != operands[0])
+ emit_move_insn (operands[0], dst);
+}
+
+/* Return TRUE or FALSE depending on whether the binary operator meets the
+ appropriate constraints. */
+
+int
+ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
+ rtx operands[3])
+{
+ rtx dst = operands[0];
+ rtx src1 = operands[1];
+ rtx src2 = operands[2];
+
+ /* Both source operands cannot be in memory. */
+ if (MEM_P (src1) && MEM_P (src2))
+ return 0;
+
+ /* Canonicalize operand order for commutative operators. */
+ if (ix86_swap_binary_operands_p (code, mode, operands))
+ {
+ rtx temp = src1;
+ src1 = src2;
+ src2 = temp;
+ }
+
+ /* If the destination is memory, we must have a matching source operand. */
+ if (MEM_P (dst) && !rtx_equal_p (dst, src1))
+ return 0;
+
+ /* Source 1 cannot be a constant. */
+ if (CONSTANT_P (src1))
+ return 0;
+
+ /* Source 1 cannot be a non-matching memory. */
+ if (MEM_P (src1) && !rtx_equal_p (dst, src1))
+ return 0;
+
+ return 1;
+}
+
+/* Attempt to expand a unary operator. Make the expansion closer to the
+ actual machine, then just general_operand, which will allow 2 separate
+ memory references (one output, one input) in a single insn. */
+
+void
+ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ int matching_memory;
+ rtx src, dst, op, clob;
+
+ dst = operands[0];
+ src = operands[1];
+
+ /* If the destination is memory, and we do not have matching source
+ operands, do things in registers. */
+ matching_memory = 0;
+ if (MEM_P (dst))
+ {
+ if (rtx_equal_p (dst, src))
+ matching_memory = 1;
+ else
+ dst = gen_reg_rtx (mode);
+ }
+
+ /* When source operand is memory, destination must match. */
+ if (MEM_P (src) && !matching_memory)
+ src = force_reg (mode, src);
+
+ /* Emit the instruction. */
+
+ op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
+ if (reload_in_progress || code == NOT)
+ {
+ /* Reload doesn't know about the flags register, and doesn't know that
+ it doesn't want to clobber it. */
+ gcc_assert (code == NOT);
+ emit_insn (op);
+ }
+ else
+ {
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+ }
+
+ /* Fix up the destination if needed. */
+ if (dst != operands[0])
+ emit_move_insn (operands[0], dst);
+}
+
+/* Return TRUE or FALSE depending on whether the unary operator meets the
+ appropriate constraints. */
+
+int
+ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ rtx operands[2] ATTRIBUTE_UNUSED)
+{
+ /* If one of operands is memory, source and destination must match. */
+ if ((MEM_P (operands[0])
+ || MEM_P (operands[1]))
+ && ! rtx_equal_p (operands[0], operands[1]))
+ return FALSE;
+ return TRUE;
+}
+
+/* Post-reload splitter for converting an SF or DFmode value in an
+ SSE register into an unsigned SImode. */
+
+void
+ix86_split_convert_uns_si_sse (rtx operands[])
+{
+ enum machine_mode vecmode;
+ rtx value, large, zero_or_two31, input, two31, x;
+
+ large = operands[1];
+ zero_or_two31 = operands[2];
+ input = operands[3];
+ two31 = operands[4];
+ vecmode = GET_MODE (large);
+ value = gen_rtx_REG (vecmode, REGNO (operands[0]));
+
+ /* Load up the value into the low element. We must ensure that the other
+ elements are valid floats -- zero is the easiest such value. */
+ if (MEM_P (input))
+ {
+ if (vecmode == V4SFmode)
+ emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
+ else
+ emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
+ }
+ else
+ {
+ input = gen_rtx_REG (vecmode, REGNO (input));
+ emit_move_insn (value, CONST0_RTX (vecmode));
+ if (vecmode == V4SFmode)
+ emit_insn (gen_sse_movss (value, value, input));
+ else
+ emit_insn (gen_sse2_movsd (value, value, input));
+ }
+
+ emit_move_insn (large, two31);
+ emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
+
+ x = gen_rtx_fmt_ee (LE, vecmode, large, value);
+ emit_insn (gen_rtx_SET (VOIDmode, large, x));
+
+ x = gen_rtx_AND (vecmode, zero_or_two31, large);
+ emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
+
+ x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
+ emit_insn (gen_rtx_SET (VOIDmode, value, x));
+
+ large = gen_rtx_REG (V4SImode, REGNO (large));
+ emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
+
+ x = gen_rtx_REG (V4SImode, REGNO (value));
+ if (vecmode == V4SFmode)
+ emit_insn (gen_sse2_cvttps2dq (x, value));
+ else
+ emit_insn (gen_sse2_cvttpd2dq (x, value));
+ value = x;
+
+ emit_insn (gen_xorv4si3 (value, value, large));
+}
+
+/* Convert an unsigned DImode value into a DFmode, using only SSE.
+ Expects the 64-bit DImode to be supplied in a pair of integral
+ registers. Requires SSE2; will use SSE3 if available. For x86_32,
+ -mfpmath=sse, !optimize_size only. */
+
+void
+ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
+{
+ REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
+ rtx int_xmm, fp_xmm;
+ rtx biases, exponents;
+ rtx x;
+
+ int_xmm = gen_reg_rtx (V4SImode);
+ if (TARGET_INTER_UNIT_MOVES)
+ emit_insn (gen_movdi_to_sse (int_xmm, input));
+ else if (TARGET_SSE_SPLIT_REGS)
+ {
+ emit_clobber (int_xmm);
+ emit_move_insn (gen_lowpart (DImode, int_xmm), input);
+ }
+ else
+ {
+ x = gen_reg_rtx (V2DImode);
+ ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
+ emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
+ }
+
+ x = gen_rtx_CONST_VECTOR (V4SImode,
+ gen_rtvec (4, GEN_INT (0x43300000UL),
+ GEN_INT (0x45300000UL),
+ const0_rtx, const0_rtx));
+ exponents = validize_mem (force_const_mem (V4SImode, x));
+
+ /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
+ emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
+
+ /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
+ yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
+ Similarly (0x45300000UL ## fp_value_hi_xmm) yields
+ (0x1.0p84 + double(fp_value_hi_xmm)).
+ Note these exponents differ by 32. */
+
+ fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
+
+ /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
+ in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
+ real_ldexp (&bias_lo_rvt, &dconst1, 52);
+ real_ldexp (&bias_hi_rvt, &dconst1, 84);
+ biases = const_double_from_real_value (bias_lo_rvt, DFmode);
+ x = const_double_from_real_value (bias_hi_rvt, DFmode);
+ biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
+ biases = validize_mem (force_const_mem (V2DFmode, biases));
+ emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
+
+ /* Add the upper and lower DFmode values together. */
+ if (TARGET_SSE3)
+ emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
+ else
+ {
+ x = copy_to_mode_reg (V2DFmode, fp_xmm);
+ emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
+ emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
+ }
+
+ ix86_expand_vector_extract (false, target, fp_xmm, 0);
+}
+
+/* Not used, but eases macroization of patterns. */
+void
+ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
+ rtx input ATTRIBUTE_UNUSED)
+{
+ gcc_unreachable ();
+}
+
+/* Convert an unsigned SImode value into a DFmode. Only currently used
+ for SSE, but applicable anywhere. */
+
+void
+ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
+{
+ REAL_VALUE_TYPE TWO31r;
+ rtx x, fp;
+
+ x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
+ NULL, 1, OPTAB_DIRECT);
+
+ fp = gen_reg_rtx (DFmode);
+ emit_insn (gen_floatsidf2 (fp, x));
+
+ real_ldexp (&TWO31r, &dconst1, 31);
+ x = const_double_from_real_value (TWO31r, DFmode);
+
+ x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
+ if (x != target)
+ emit_move_insn (target, x);
+}
+
+/* Convert a signed DImode value into a DFmode. Only used for SSE in
+ 32-bit mode; otherwise we have a direct convert instruction. */
+
+void
+ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
+{
+ REAL_VALUE_TYPE TWO32r;
+ rtx fp_lo, fp_hi, x;
+
+ fp_lo = gen_reg_rtx (DFmode);
+ fp_hi = gen_reg_rtx (DFmode);
+
+ emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
+
+ real_ldexp (&TWO32r, &dconst1, 32);
+ x = const_double_from_real_value (TWO32r, DFmode);
+ fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
+
+ ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
+
+ x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
+ 0, OPTAB_DIRECT);
+ if (x != target)
+ emit_move_insn (target, x);
+}
+
+/* Convert an unsigned SImode value into a SFmode, using only SSE.
+ For x86_32, -mfpmath=sse, !optimize_size only. */
+void
+ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
+{
+ REAL_VALUE_TYPE ONE16r;
+ rtx fp_hi, fp_lo, int_hi, int_lo, x;
+
+ real_ldexp (&ONE16r, &dconst1, 16);
+ x = const_double_from_real_value (ONE16r, SFmode);
+ int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
+ NULL, 0, OPTAB_DIRECT);
+ int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
+ NULL, 0, OPTAB_DIRECT);
+ fp_hi = gen_reg_rtx (SFmode);
+ fp_lo = gen_reg_rtx (SFmode);
+ emit_insn (gen_floatsisf2 (fp_hi, int_hi));
+ emit_insn (gen_floatsisf2 (fp_lo, int_lo));
+ fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
+ 0, OPTAB_DIRECT);
+ fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
+ 0, OPTAB_DIRECT);
+ if (!rtx_equal_p (target, fp_hi))
+ emit_move_insn (target, fp_hi);
+}
+
+/* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
+ then replicate the value for all elements of the vector
+ register. */
+
+rtx
+ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
+{
+ rtvec v;
+ switch (mode)
+ {
+ case SImode:
+ gcc_assert (vect);
+ v = gen_rtvec (4, value, value, value, value);
+ return gen_rtx_CONST_VECTOR (V4SImode, v);
+
+ case DImode:
+ gcc_assert (vect);
+ v = gen_rtvec (2, value, value);
+ return gen_rtx_CONST_VECTOR (V2DImode, v);
+
+ case SFmode:
+ if (vect)
+ v = gen_rtvec (4, value, value, value, value);
+ else
+ v = gen_rtvec (4, value, CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode), CONST0_RTX (SFmode));
+ return gen_rtx_CONST_VECTOR (V4SFmode, v);
+
+ case DFmode:
+ if (vect)
+ v = gen_rtvec (2, value, value);
+ else
+ v = gen_rtvec (2, value, CONST0_RTX (DFmode));
+ return gen_rtx_CONST_VECTOR (V2DFmode, v);
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
+ and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
+ for an SSE register. If VECT is true, then replicate the mask for
+ all elements of the vector register. If INVERT is true, then create
+ a mask excluding the sign bit. */
+
+rtx
+ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
+{
+ enum machine_mode vec_mode, imode;
+ HOST_WIDE_INT hi, lo;
+ int shift = 63;
+ rtx v;
+ rtx mask;
+
+ /* Find the sign bit, sign extended to 2*HWI. */
+ switch (mode)
+ {
+ case SImode:
+ case SFmode:
+ imode = SImode;
+ vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
+ lo = 0x80000000, hi = lo < 0;
+ break;
+
+ case DImode:
+ case DFmode:
+ imode = DImode;
+ vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
+ if (HOST_BITS_PER_WIDE_INT >= 64)
+ lo = (HOST_WIDE_INT)1 << shift, hi = -1;
+ else
+ lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
+ break;
+
+ case TImode:
+ case TFmode:
+ vec_mode = VOIDmode;
+ if (HOST_BITS_PER_WIDE_INT >= 64)
+ {
+ imode = TImode;
+ lo = 0, hi = (HOST_WIDE_INT)1 << shift;
+ }
+ else
+ {
+ rtvec vec;
+
+ imode = DImode;
+ lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
+
+ if (invert)
+ {
+ lo = ~lo, hi = ~hi;
+ v = constm1_rtx;
+ }
+ else
+ v = const0_rtx;
+
+ mask = immed_double_const (lo, hi, imode);
+
+ vec = gen_rtvec (2, v, mask);
+ v = gen_rtx_CONST_VECTOR (V2DImode, vec);
+ v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
+
+ return v;
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (invert)
+ lo = ~lo, hi = ~hi;
+
+ /* Force this value into the low part of a fp vector constant. */
+ mask = immed_double_const (lo, hi, imode);
+ mask = gen_lowpart (mode, mask);
+
+ if (vec_mode == VOIDmode)
+ return force_reg (mode, mask);
+
+ v = ix86_build_const_vector (mode, vect, mask);
+ return force_reg (vec_mode, v);
+}
+
+/* Generate code for floating point ABS or NEG. */
+
+void
+ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ rtx mask, set, use, clob, dst, src;
+ bool use_sse = false;
+ bool vector_mode = VECTOR_MODE_P (mode);
+ enum machine_mode elt_mode = mode;
+
+ if (vector_mode)
+ {
+ elt_mode = GET_MODE_INNER (mode);
+ use_sse = true;
+ }
+ else if (mode == TFmode)
+ use_sse = true;
+ else if (TARGET_SSE_MATH)
+ use_sse = SSE_FLOAT_MODE_P (mode);
+
+ /* NEG and ABS performed with SSE use bitwise mask operations.
+ Create the appropriate mask now. */
+ if (use_sse)
+ mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
+ else
+ mask = NULL_RTX;
+
+ dst = operands[0];
+ src = operands[1];
+
+ if (vector_mode)
+ {
+ set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
+ set = gen_rtx_SET (VOIDmode, dst, set);
+ emit_insn (set);
+ }
+ else
+ {
+ set = gen_rtx_fmt_e (code, mode, src);
+ set = gen_rtx_SET (VOIDmode, dst, set);
+ if (mask)
+ {
+ use = gen_rtx_USE (VOIDmode, mask);
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (3, set, use, clob)));
+ }
+ else
+ emit_insn (set);
+ }
+}
+
+/* Expand a copysign operation. Special case operand 0 being a constant. */
+
+void
+ix86_expand_copysign (rtx operands[])
+{
+ enum machine_mode mode;
+ rtx dest, op0, op1, mask, nmask;
+
+ dest = operands[0];
+ op0 = operands[1];
+ op1 = operands[2];
+
+ mode = GET_MODE (dest);
+
+ if (GET_CODE (op0) == CONST_DOUBLE)
+ {
+ rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
+
+ if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
+ op0 = simplify_unary_operation (ABS, mode, op0, mode);
+
+ if (mode == SFmode || mode == DFmode)
+ {
+ enum machine_mode vmode;
+
+ vmode = mode == SFmode ? V4SFmode : V2DFmode;
+
+ if (op0 == CONST0_RTX (mode))
+ op0 = CONST0_RTX (vmode);
+ else
+ {
+ rtvec v;
+
+ if (mode == SFmode)
+ v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
+ CONST0_RTX (SFmode), CONST0_RTX (SFmode));
+ else
+ v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
+
+ op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
+ }
+ }
+ else if (op0 != CONST0_RTX (mode))
+ op0 = force_reg (mode, op0);
+
+ mask = ix86_build_signbit_mask (mode, 0, 0);
+
+ if (mode == SFmode)
+ copysign_insn = gen_copysignsf3_const;
+ else if (mode == DFmode)
+ copysign_insn = gen_copysigndf3_const;
+ else
+ copysign_insn = gen_copysigntf3_const;
+
+ emit_insn (copysign_insn (dest, op0, op1, mask));
+ }
+ else
+ {
+ rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
+
+ nmask = ix86_build_signbit_mask (mode, 0, 1);
+ mask = ix86_build_signbit_mask (mode, 0, 0);
+
+ if (mode == SFmode)
+ copysign_insn = gen_copysignsf3_var;
+ else if (mode == DFmode)
+ copysign_insn = gen_copysigndf3_var;
+ else
+ copysign_insn = gen_copysigntf3_var;
+
+ emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
+ }
+}
+
+/* Deconstruct a copysign operation into bit masks. Operand 0 is known to
+ be a constant, and so has already been expanded into a vector constant. */
+
+void
+ix86_split_copysign_const (rtx operands[])
+{
+ enum machine_mode mode, vmode;
+ rtx dest, op0, op1, mask, x;
+
+ dest = operands[0];
+ op0 = operands[1];
+ op1 = operands[2];
+ mask = operands[3];
+
+ mode = GET_MODE (dest);
+ vmode = GET_MODE (mask);
+
+ dest = simplify_gen_subreg (vmode, dest, mode, 0);
+ x = gen_rtx_AND (vmode, dest, mask);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+
+ if (op0 != CONST0_RTX (vmode))
+ {
+ x = gen_rtx_IOR (vmode, dest, op0);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+}
+
+/* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
+ so we have to do two masks. */
+
+void
+ix86_split_copysign_var (rtx operands[])
+{
+ enum machine_mode mode, vmode;
+ rtx dest, scratch, op0, op1, mask, nmask, x;
+
+ dest = operands[0];
+ scratch = operands[1];
+ op0 = operands[2];
+ op1 = operands[3];
+ nmask = operands[4];
+ mask = operands[5];
+
+ mode = GET_MODE (dest);
+ vmode = GET_MODE (mask);
+
+ if (rtx_equal_p (op0, op1))
+ {
+ /* Shouldn't happen often (it's useless, obviously), but when it does
+ we'd generate incorrect code if we continue below. */
+ emit_move_insn (dest, op0);
+ return;
+ }
+
+ if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
+ {
+ gcc_assert (REGNO (op1) == REGNO (scratch));
+
+ x = gen_rtx_AND (vmode, scratch, mask);
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
+
+ dest = mask;
+ op0 = simplify_gen_subreg (vmode, op0, mode, 0);
+ x = gen_rtx_NOT (vmode, dest);
+ x = gen_rtx_AND (vmode, x, op0);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+ else
+ {
+ if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
+ {
+ x = gen_rtx_AND (vmode, scratch, mask);
+ }
+ else /* alternative 2,4 */
+ {
+ gcc_assert (REGNO (mask) == REGNO (scratch));
+ op1 = simplify_gen_subreg (vmode, op1, mode, 0);
+ x = gen_rtx_AND (vmode, scratch, op1);
+ }
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
+
+ if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
+ {
+ dest = simplify_gen_subreg (vmode, op0, mode, 0);
+ x = gen_rtx_AND (vmode, dest, nmask);
+ }
+ else /* alternative 3,4 */
+ {
+ gcc_assert (REGNO (nmask) == REGNO (dest));
+ dest = nmask;
+ op0 = simplify_gen_subreg (vmode, op0, mode, 0);
+ x = gen_rtx_AND (vmode, dest, op0);
+ }
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+
+ x = gen_rtx_IOR (vmode, dest, scratch);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+}
+
+/* Return TRUE or FALSE depending on whether the first SET in INSN
+ has source and destination with matching CC modes, and that the
+ CC mode is at least as constrained as REQ_MODE. */
+
+int
+ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
+{
+ rtx set;
+ enum machine_mode set_mode;
+
+ set = PATTERN (insn);
+ if (GET_CODE (set) == PARALLEL)
+ set = XVECEXP (set, 0, 0);
+ gcc_assert (GET_CODE (set) == SET);
+ gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
+
+ set_mode = GET_MODE (SET_DEST (set));
+ switch (set_mode)
+ {
+ case CCNOmode:
+ if (req_mode != CCNOmode
+ && (req_mode != CCmode
+ || XEXP (SET_SRC (set), 1) != const0_rtx))
+ return 0;
+ break;
+ case CCmode:
+ if (req_mode == CCGCmode)
+ return 0;
+ /* FALLTHRU */
+ case CCGCmode:
+ if (req_mode == CCGOCmode || req_mode == CCNOmode)
+ return 0;
+ /* FALLTHRU */
+ case CCGOCmode:
+ if (req_mode == CCZmode)
+ return 0;
+ /* FALLTHRU */
+ case CCAmode:
+ case CCCmode:
+ case CCOmode:
+ case CCSmode:
+ case CCZmode:
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return (GET_MODE (SET_SRC (set)) == set_mode);
+}
+
+/* Generate insn patterns to do an integer compare of OPERANDS. */
+
+static rtx
+ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
+{
+ enum machine_mode cmpmode;
+ rtx tmp, flags;
+
+ cmpmode = SELECT_CC_MODE (code, op0, op1);
+ flags = gen_rtx_REG (cmpmode, FLAGS_REG);
+
+ /* This is very simple, but making the interface the same as in the
+ FP case makes the rest of the code easier. */
+ tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
+ emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
+
+ /* Return the test that should be put into the flags user, i.e.
+ the bcc, scc, or cmov instruction. */
+ return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
+}
+
+/* Figure out whether to use ordered or unordered fp comparisons.
+ Return the appropriate mode to use. */
+
+enum machine_mode
+ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
+{
+ /* ??? In order to make all comparisons reversible, we do all comparisons
+ non-trapping when compiling for IEEE. Once gcc is able to distinguish
+ all forms trapping and nontrapping comparisons, we can make inequality
+ comparisons trapping again, since it results in better code when using
+ FCOM based compares. */
+ return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
+}
+
+enum machine_mode
+ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
+{
+ enum machine_mode mode = GET_MODE (op0);
+
+ if (SCALAR_FLOAT_MODE_P (mode))
+ {
+ gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
+ return ix86_fp_compare_mode (code);
+ }
+
+ switch (code)
+ {
+ /* Only zero flag is needed. */
+ case EQ: /* ZF=0 */
+ case NE: /* ZF!=0 */
+ return CCZmode;
+ /* Codes needing carry flag. */
+ case GEU: /* CF=0 */
+ case LTU: /* CF=1 */
+ /* Detect overflow checks. They need just the carry flag. */
+ if (GET_CODE (op0) == PLUS
+ && rtx_equal_p (op1, XEXP (op0, 0)))
+ return CCCmode;
+ else
+ return CCmode;
+ case GTU: /* CF=0 & ZF=0 */
+ case LEU: /* CF=1 | ZF=1 */
+ /* Detect overflow checks. They need just the carry flag. */
+ if (GET_CODE (op0) == MINUS
+ && rtx_equal_p (op1, XEXP (op0, 0)))
+ return CCCmode;
+ else
+ return CCmode;
+ /* Codes possibly doable only with sign flag when
+ comparing against zero. */
+ case GE: /* SF=OF or SF=0 */
+ case LT: /* SF<>OF or SF=1 */
+ if (op1 == const0_rtx)
+ return CCGOCmode;
+ else
+ /* For other cases Carry flag is not required. */
+ return CCGCmode;
+ /* Codes doable only with sign flag when comparing
+ against zero, but we miss jump instruction for it
+ so we need to use relational tests against overflow
+ that thus needs to be zero. */
+ case GT: /* ZF=0 & SF=OF */
+ case LE: /* ZF=1 | SF<>OF */
+ if (op1 == const0_rtx)
+ return CCNOmode;
+ else
+ return CCGCmode;
+ /* strcmp pattern do (use flags) and combine may ask us for proper
+ mode. */
+ case USE:
+ return CCmode;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return the fixed registers used for condition codes. */
+
+static bool
+ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+ *p1 = FLAGS_REG;
+ *p2 = FPSR_REG;
+ return true;
+}
+
+/* If two condition code modes are compatible, return a condition code
+ mode which is compatible with both. Otherwise, return
+ VOIDmode. */
+
+static enum machine_mode
+ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
+{
+ if (m1 == m2)
+ return m1;
+
+ if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
+ return VOIDmode;
+
+ if ((m1 == CCGCmode && m2 == CCGOCmode)
+ || (m1 == CCGOCmode && m2 == CCGCmode))
+ return CCGCmode;
+
+ switch (m1)
+ {
+ default:
+ gcc_unreachable ();
+
+ case CCmode:
+ case CCGCmode:
+ case CCGOCmode:
+ case CCNOmode:
+ case CCAmode:
+ case CCCmode:
+ case CCOmode:
+ case CCSmode:
+ case CCZmode:
+ switch (m2)
+ {
+ default:
+ return VOIDmode;
+
+ case CCmode:
+ case CCGCmode:
+ case CCGOCmode:
+ case CCNOmode:
+ case CCAmode:
+ case CCCmode:
+ case CCOmode:
+ case CCSmode:
+ case CCZmode:
+ return CCmode;
+ }
+
+ case CCFPmode:
+ case CCFPUmode:
+ /* These are only compatible with themselves, which we already
+ checked above. */
+ return VOIDmode;
+ }
+}
+
+/* Split comparison code CODE into comparisons we can do using branch
+ instructions. BYPASS_CODE is comparison code for branch that will
+ branch around FIRST_CODE and SECOND_CODE. If some of branches
+ is not required, set value to UNKNOWN.
+ We never require more than two branches. */
+
+void
+ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
+ enum rtx_code *first_code,
+ enum rtx_code *second_code)
+{
+ *first_code = code;
+ *bypass_code = UNKNOWN;
+ *second_code = UNKNOWN;
+
+ /* The fcomi comparison sets flags as follows:
+
+ cmp ZF PF CF
+ > 0 0 0
+ < 0 0 1
+ = 1 0 0
+ un 1 1 1 */
+
+ switch (code)
+ {
+ case GT: /* GTU - CF=0 & ZF=0 */
+ case GE: /* GEU - CF=0 */
+ case ORDERED: /* PF=0 */
+ case UNORDERED: /* PF=1 */
+ case UNEQ: /* EQ - ZF=1 */
+ case UNLT: /* LTU - CF=1 */
+ case UNLE: /* LEU - CF=1 | ZF=1 */
+ case LTGT: /* EQ - ZF=0 */
+ break;
+ case LT: /* LTU - CF=1 - fails on unordered */
+ *first_code = UNLT;
+ *bypass_code = UNORDERED;
+ break;
+ case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
+ *first_code = UNLE;
+ *bypass_code = UNORDERED;
+ break;
+ case EQ: /* EQ - ZF=1 - fails on unordered */
+ *first_code = UNEQ;
+ *bypass_code = UNORDERED;
+ break;
+ case NE: /* NE - ZF=0 - fails on unordered */
+ *first_code = LTGT;
+ *second_code = UNORDERED;
+ break;
+ case UNGE: /* GEU - CF=0 - fails on unordered */
+ *first_code = GE;
+ *second_code = UNORDERED;
+ break;
+ case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
+ *first_code = GT;
+ *second_code = UNORDERED;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!TARGET_IEEE_FP)
+ {
+ *second_code = UNKNOWN;
+ *bypass_code = UNKNOWN;
+ }
+}
+
+/* Return cost of comparison done fcom + arithmetics operations on AX.
+ All following functions do use number of instructions as a cost metrics.
+ In future this should be tweaked to compute bytes for optimize_size and
+ take into account performance of various instructions on various CPUs. */
+static int
+ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
+{
+ if (!TARGET_IEEE_FP)
+ return 4;
+ /* The cost of code output by ix86_expand_fp_compare. */
+ switch (code)
+ {
+ case UNLE:
+ case UNLT:
+ case LTGT:
+ case GT:
+ case GE:
+ case UNORDERED:
+ case ORDERED:
+ case UNEQ:
+ return 4;
+ break;
+ case LT:
+ case NE:
+ case EQ:
+ case UNGE:
+ return 5;
+ break;
+ case LE:
+ case UNGT:
+ return 6;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return cost of comparison done using fcomi operation.
+ See ix86_fp_comparison_arithmetics_cost for the metrics. */
+static int
+ix86_fp_comparison_fcomi_cost (enum rtx_code code)
+{
+ enum rtx_code bypass_code, first_code, second_code;
+ /* Return arbitrarily high cost when instruction is not supported - this
+ prevents gcc from using it. */
+ if (!TARGET_CMOVE)
+ return 1024;
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+ return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
+}
+
+/* Return cost of comparison done using sahf operation.
+ See ix86_fp_comparison_arithmetics_cost for the metrics. */
+static int
+ix86_fp_comparison_sahf_cost (enum rtx_code code)
+{
+ enum rtx_code bypass_code, first_code, second_code;
+ /* Return arbitrarily high cost when instruction is not preferred - this
+ avoids gcc from using it. */
+ if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
+ return 1024;
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+ return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
+}
+
+/* Compute cost of the comparison done using any method.
+ See ix86_fp_comparison_arithmetics_cost for the metrics. */
+static int
+ix86_fp_comparison_cost (enum rtx_code code)
+{
+ int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
+ int min;
+
+ fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
+ sahf_cost = ix86_fp_comparison_sahf_cost (code);
+
+ min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
+ if (min > sahf_cost)
+ min = sahf_cost;
+ if (min > fcomi_cost)
+ min = fcomi_cost;
+ return min;
+}
+
+/* Return true if we should use an FCOMI instruction for this
+ fp comparison. */
+
+int
+ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
+{
+ enum rtx_code swapped_code = swap_condition (code);
+
+ return ((ix86_fp_comparison_cost (code)
+ == ix86_fp_comparison_fcomi_cost (code))
+ || (ix86_fp_comparison_cost (swapped_code)
+ == ix86_fp_comparison_fcomi_cost (swapped_code)));
+}
+
+/* Swap, force into registers, or otherwise massage the two operands
+ to a fp comparison. The operands are updated in place; the new
+ comparison code is returned. */
+
+static enum rtx_code
+ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
+{
+ enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
+ rtx op0 = *pop0, op1 = *pop1;
+ enum machine_mode op_mode = GET_MODE (op0);
+ int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
+
+ /* All of the unordered compare instructions only work on registers.
+ The same is true of the fcomi compare instructions. The XFmode
+ compare instructions require registers except when comparing
+ against zero or when converting operand 1 from fixed point to
+ floating point. */
+
+ if (!is_sse
+ && (fpcmp_mode == CCFPUmode
+ || (op_mode == XFmode
+ && ! (standard_80387_constant_p (op0) == 1
+ || standard_80387_constant_p (op1) == 1)
+ && GET_CODE (op1) != FLOAT)
+ || ix86_use_fcomi_compare (code)))
+ {
+ op0 = force_reg (op_mode, op0);
+ op1 = force_reg (op_mode, op1);
+ }
+ else
+ {
+ /* %%% We only allow op1 in memory; op0 must be st(0). So swap
+ things around if they appear profitable, otherwise force op0
+ into a register. */
+
+ if (standard_80387_constant_p (op0) == 0
+ || (MEM_P (op0)
+ && ! (standard_80387_constant_p (op1) == 0
+ || MEM_P (op1))))
+ {
+ rtx tmp;
+ tmp = op0, op0 = op1, op1 = tmp;
+ code = swap_condition (code);
+ }
+
+ if (!REG_P (op0))
+ op0 = force_reg (op_mode, op0);
+
+ if (CONSTANT_P (op1))
+ {
+ int tmp = standard_80387_constant_p (op1);
+ if (tmp == 0)
+ op1 = validize_mem (force_const_mem (op_mode, op1));
+ else if (tmp == 1)
+ {
+ if (TARGET_CMOVE)
+ op1 = force_reg (op_mode, op1);
+ }
+ else
+ op1 = force_reg (op_mode, op1);
+ }
+ }
+
+ /* Try to rearrange the comparison to make it cheaper. */
+ if (ix86_fp_comparison_cost (code)
+ > ix86_fp_comparison_cost (swap_condition (code))
+ && (REG_P (op1) || can_create_pseudo_p ()))
+ {
+ rtx tmp;
+ tmp = op0, op0 = op1, op1 = tmp;
+ code = swap_condition (code);
+ if (!REG_P (op0))
+ op0 = force_reg (op_mode, op0);
+ }
+
+ *pop0 = op0;
+ *pop1 = op1;
+ return code;
+}
+
+/* Convert comparison codes we use to represent FP comparison to integer
+ code that will result in proper branch. Return UNKNOWN if no such code
+ is available. */
+
+enum rtx_code
+ix86_fp_compare_code_to_integer (enum rtx_code code)
+{
+ switch (code)
+ {
+ case GT:
+ return GTU;
+ case GE:
+ return GEU;
+ case ORDERED:
+ case UNORDERED:
+ return code;
+ break;
+ case UNEQ:
+ return EQ;
+ break;
+ case UNLT:
+ return LTU;
+ break;
+ case UNLE:
+ return LEU;
+ break;
+ case LTGT:
+ return NE;
+ break;
+ default:
+ return UNKNOWN;
+ }
+}
+
+/* Generate insn patterns to do a floating point compare of OPERANDS. */
+
+static rtx
+ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
+ rtx *second_test, rtx *bypass_test)
+{
+ enum machine_mode fpcmp_mode, intcmp_mode;
+ rtx tmp, tmp2;
+ int cost = ix86_fp_comparison_cost (code);
+ enum rtx_code bypass_code, first_code, second_code;
+
+ fpcmp_mode = ix86_fp_compare_mode (code);
+ code = ix86_prepare_fp_compare_args (code, &op0, &op1);
+
+ if (second_test)
+ *second_test = NULL_RTX;
+ if (bypass_test)
+ *bypass_test = NULL_RTX;
+
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+
+ /* Do fcomi/sahf based test when profitable. */
+ if (ix86_fp_comparison_arithmetics_cost (code) > cost
+ && (bypass_code == UNKNOWN || bypass_test)
+ && (second_code == UNKNOWN || second_test))
+ {
+ tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+ tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
+ tmp);
+ if (TARGET_CMOVE)
+ emit_insn (tmp);
+ else
+ {
+ gcc_assert (TARGET_SAHF);
+
+ if (!scratch)
+ scratch = gen_reg_rtx (HImode);
+ tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
+
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
+ }
+
+ /* The FP codes work out to act like unsigned. */
+ intcmp_mode = fpcmp_mode;
+ code = first_code;
+ if (bypass_code != UNKNOWN)
+ *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
+ gen_rtx_REG (intcmp_mode, FLAGS_REG),
+ const0_rtx);
+ if (second_code != UNKNOWN)
+ *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
+ gen_rtx_REG (intcmp_mode, FLAGS_REG),
+ const0_rtx);
+ }
+ else
+ {
+ /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
+ tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
+ tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
+ if (!scratch)
+ scratch = gen_reg_rtx (HImode);
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
+
+ /* In the unordered case, we have to check C2 for NaN's, which
+ doesn't happen to work out to anything nice combination-wise.
+ So do some bit twiddling on the value we've got in AH to come
+ up with an appropriate set of condition codes. */
+
+ intcmp_mode = CCNOmode;
+ switch (code)
+ {
+ case GT:
+ case UNGT:
+ if (code == GT || !TARGET_IEEE_FP)
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
+ code = EQ;
+ }
+ else
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
+ intcmp_mode = CCmode;
+ code = GEU;
+ }
+ break;
+ case LT:
+ case UNLT:
+ if (code == LT && TARGET_IEEE_FP)
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
+ intcmp_mode = CCmode;
+ code = EQ;
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
+ code = NE;
+ }
+ break;
+ case GE:
+ case UNGE:
+ if (code == GE || !TARGET_IEEE_FP)
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
+ code = EQ;
+ }
+ else
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
+ GEN_INT (0x01)));
+ code = NE;
+ }
+ break;
+ case LE:
+ case UNLE:
+ if (code == LE && TARGET_IEEE_FP)
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
+ intcmp_mode = CCmode;
+ code = LTU;
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
+ code = NE;
+ }
+ break;
+ case EQ:
+ case UNEQ:
+ if (code == EQ && TARGET_IEEE_FP)
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
+ intcmp_mode = CCmode;
+ code = EQ;
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
+ code = NE;
+ break;
+ }
+ break;
+ case NE:
+ case LTGT:
+ if (code == NE && TARGET_IEEE_FP)
+ {
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
+ emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
+ GEN_INT (0x40)));
+ code = NE;
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
+ code = EQ;
+ }
+ break;
+
+ case UNORDERED:
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
+ code = NE;
+ break;
+ case ORDERED:
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
+ code = EQ;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ /* Return the test that should be put into the flags user, i.e.
+ the bcc, scc, or cmov instruction. */
+ return gen_rtx_fmt_ee (code, VOIDmode,
+ gen_rtx_REG (intcmp_mode, FLAGS_REG),
+ const0_rtx);
+}
+
+rtx
+ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
+{
+ rtx op0, op1, ret;
+ op0 = ix86_compare_op0;
+ op1 = ix86_compare_op1;
+
+ if (second_test)
+ *second_test = NULL_RTX;
+ if (bypass_test)
+ *bypass_test = NULL_RTX;
+
+ if (ix86_compare_emitted)
+ {
+ ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
+ ix86_compare_emitted = NULL_RTX;
+ }
+ else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
+ {
+ gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
+ ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
+ second_test, bypass_test);
+ }
+ else
+ ret = ix86_expand_int_compare (code, op0, op1);
+
+ return ret;
+}
+
+/* Return true if the CODE will result in nontrivial jump sequence. */
+bool
+ix86_fp_jump_nontrivial_p (enum rtx_code code)
+{
+ enum rtx_code bypass_code, first_code, second_code;
+ if (!TARGET_CMOVE)
+ return true;
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+ return bypass_code != UNKNOWN || second_code != UNKNOWN;
+}
+
+void
+ix86_expand_branch (enum rtx_code code, rtx label)
+{
+ rtx tmp;
+
+ /* If we have emitted a compare insn, go straight to simple.
+ ix86_expand_compare won't emit anything if ix86_compare_emitted
+ is non NULL. */
+ if (ix86_compare_emitted)
+ goto simple;
+
+ switch (GET_MODE (ix86_compare_op0))
+ {
+ case QImode:
+ case HImode:
+ case SImode:
+ simple:
+ tmp = ix86_expand_compare (code, NULL, NULL);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ return;
+
+ case SFmode:
+ case DFmode:
+ case XFmode:
+ {
+ rtvec vec;
+ int use_fcomi;
+ enum rtx_code bypass_code, first_code, second_code;
+
+ code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
+ &ix86_compare_op1);
+
+ ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
+
+ /* Check whether we will use the natural sequence with one jump. If
+ so, we can expand jump early. Otherwise delay expansion by
+ creating compound insn to not confuse optimizers. */
+ if (bypass_code == UNKNOWN && second_code == UNKNOWN)
+ {
+ ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx, NULL_RTX, NULL_RTX);
+ }
+ else
+ {
+ tmp = gen_rtx_fmt_ee (code, VOIDmode,
+ ix86_compare_op0, ix86_compare_op1);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
+
+ use_fcomi = ix86_use_fcomi_compare (code);
+ vec = rtvec_alloc (3 + !use_fcomi);
+ RTVEC_ELT (vec, 0) = tmp;
+ RTVEC_ELT (vec, 1)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
+ RTVEC_ELT (vec, 2)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
+ if (! use_fcomi)
+ RTVEC_ELT (vec, 3)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
+
+ emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
+ }
+ return;
+ }
+
+ case DImode:
+ if (TARGET_64BIT)
+ goto simple;
+ case TImode:
+ /* Expand DImode branch into multiple compare+branch. */
+ {
+ rtx lo[2], hi[2], label2;
+ enum rtx_code code1, code2, code3;
+ enum machine_mode submode;
+
+ if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
+ {
+ tmp = ix86_compare_op0;
+ ix86_compare_op0 = ix86_compare_op1;
+ ix86_compare_op1 = tmp;
+ code = swap_condition (code);
+ }
+ if (GET_MODE (ix86_compare_op0) == DImode)
+ {
+ split_di (&ix86_compare_op0, 1, lo+0, hi+0);
+ split_di (&ix86_compare_op1, 1, lo+1, hi+1);
+ submode = SImode;
+ }
+ else
+ {
+ split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
+ split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
+ submode = DImode;
+ }
+
+ /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
+ avoid two branches. This costs one extra insn, so disable when
+ optimizing for size. */
+
+ if ((code == EQ || code == NE)
+ && (!optimize_insn_for_size_p ()
+ || hi[1] == const0_rtx || lo[1] == const0_rtx))
+ {
+ rtx xor0, xor1;
+
+ xor1 = hi[0];
+ if (hi[1] != const0_rtx)
+ xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ xor0 = lo[0];
+ if (lo[1] != const0_rtx)
+ xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ tmp = expand_binop (submode, ior_optab, xor1, xor0,
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ ix86_compare_op0 = tmp;
+ ix86_compare_op1 = const0_rtx;
+ ix86_expand_branch (code, label);
+ return;
+ }
+
+ /* Otherwise, if we are doing less-than or greater-or-equal-than,
+ op1 is a constant and the low word is zero, then we can just
+ examine the high word. Similarly for low word -1 and
+ less-or-equal-than or greater-than. */
+
+ if (CONST_INT_P (hi[1]))
+ switch (code)
+ {
+ case LT: case LTU: case GE: case GEU:
+ if (lo[1] == const0_rtx)
+ {
+ ix86_compare_op0 = hi[0];
+ ix86_compare_op1 = hi[1];
+ ix86_expand_branch (code, label);
+ return;
+ }
+ break;
+ case LE: case LEU: case GT: case GTU:
+ if (lo[1] == constm1_rtx)
+ {
+ ix86_compare_op0 = hi[0];
+ ix86_compare_op1 = hi[1];
+ ix86_expand_branch (code, label);
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* Otherwise, we need two or three jumps. */
+
+ label2 = gen_label_rtx ();
+
+ code1 = code;
+ code2 = swap_condition (code);
+ code3 = unsigned_condition (code);
+
+ switch (code)
+ {
+ case LT: case GT: case LTU: case GTU:
+ break;
+
+ case LE: code1 = LT; code2 = GT; break;
+ case GE: code1 = GT; code2 = LT; break;
+ case LEU: code1 = LTU; code2 = GTU; break;
+ case GEU: code1 = GTU; code2 = LTU; break;
+
+ case EQ: code1 = UNKNOWN; code2 = NE; break;
+ case NE: code2 = UNKNOWN; break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /*
+ * a < b =>
+ * if (hi(a) < hi(b)) goto true;
+ * if (hi(a) > hi(b)) goto false;
+ * if (lo(a) < lo(b)) goto true;
+ * false:
+ */
+
+ ix86_compare_op0 = hi[0];
+ ix86_compare_op1 = hi[1];
+
+ if (code1 != UNKNOWN)
+ ix86_expand_branch (code1, label);
+ if (code2 != UNKNOWN)
+ ix86_expand_branch (code2, label2);
+
+ ix86_compare_op0 = lo[0];
+ ix86_compare_op1 = lo[1];
+ ix86_expand_branch (code3, label);
+
+ if (code2 != UNKNOWN)
+ emit_label (label2);
+ return;
+ }
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Split branch based on floating point condition. */
+void
+ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
+ rtx target1, rtx target2, rtx tmp, rtx pushed)
+{
+ rtx second, bypass;
+ rtx label = NULL_RTX;
+ rtx condition;
+ int bypass_probability = -1, second_probability = -1, probability = -1;
+ rtx i;
+
+ if (target2 != pc_rtx)
+ {
+ rtx tmp = target2;
+ code = reverse_condition_maybe_unordered (code);
+ target2 = target1;
+ target1 = tmp;
+ }
+
+ condition = ix86_expand_fp_compare (code, op1, op2,
+ tmp, &second, &bypass);
+
+ /* Remove pushed operand from stack. */
+ if (pushed)
+ ix86_free_from_memory (GET_MODE (pushed));
+
+ if (split_branch_probability >= 0)
+ {
+ /* Distribute the probabilities across the jumps.
+ Assume the BYPASS and SECOND to be always test
+ for UNORDERED. */
+ probability = split_branch_probability;
+
+ /* Value of 1 is low enough to make no need for probability
+ to be updated. Later we may run some experiments and see
+ if unordered values are more frequent in practice. */
+ if (bypass)
+ bypass_probability = 1;
+ if (second)
+ second_probability = 1;
+ }
+ if (bypass != NULL_RTX)
+ {
+ label = gen_label_rtx ();
+ i = emit_jump_insn (gen_rtx_SET
+ (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode,
+ bypass,
+ gen_rtx_LABEL_REF (VOIDmode,
+ label),
+ pc_rtx)));
+ if (bypass_probability >= 0)
+ REG_NOTES (i)
+ = gen_rtx_EXPR_LIST (REG_BR_PROB,
+ GEN_INT (bypass_probability),
+ REG_NOTES (i));
+ }
+ i = emit_jump_insn (gen_rtx_SET
+ (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode,
+ condition, target1, target2)));
+ if (probability >= 0)
+ REG_NOTES (i)
+ = gen_rtx_EXPR_LIST (REG_BR_PROB,
+ GEN_INT (probability),
+ REG_NOTES (i));
+ if (second != NULL_RTX)
+ {
+ i = emit_jump_insn (gen_rtx_SET
+ (VOIDmode, pc_rtx,
+ gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
+ target2)));
+ if (second_probability >= 0)
+ REG_NOTES (i)
+ = gen_rtx_EXPR_LIST (REG_BR_PROB,
+ GEN_INT (second_probability),
+ REG_NOTES (i));
+ }
+ if (label != NULL_RTX)
+ emit_label (label);
+}
+
+int
+ix86_expand_setcc (enum rtx_code code, rtx dest)
+{
+ rtx ret, tmp, tmpreg, equiv;
+ rtx second_test, bypass_test;
+
+ if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
+ return 0; /* FAIL */
+
+ gcc_assert (GET_MODE (dest) == QImode);
+
+ ret = ix86_expand_compare (code, &second_test, &bypass_test);
+ PUT_MODE (ret, QImode);
+
+ tmp = dest;
+ tmpreg = dest;
+
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
+ if (bypass_test || second_test)
+ {
+ rtx test = second_test;
+ int bypass = 0;
+ rtx tmp2 = gen_reg_rtx (QImode);
+ if (bypass_test)
+ {
+ gcc_assert (!second_test);
+ test = bypass_test;
+ bypass = 1;
+ PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
+ }
+ PUT_MODE (test, QImode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
+
+ if (bypass)
+ emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
+ else
+ emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
+ }
+
+ /* Attach a REG_EQUAL note describing the comparison result. */
+ if (ix86_compare_op0 && ix86_compare_op1)
+ {
+ equiv = simplify_gen_relational (code, QImode,
+ GET_MODE (ix86_compare_op0),
+ ix86_compare_op0, ix86_compare_op1);
+ set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
+ }
+
+ return 1; /* DONE */
+}
+
+/* Expand comparison setting or clearing carry flag. Return true when
+ successful and set pop for the operation. */
+static bool
+ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
+{
+ enum machine_mode mode =
+ GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
+
+ /* Do not handle DImode compares that go through special path. */
+ if (mode == (TARGET_64BIT ? TImode : DImode))
+ return false;
+
+ if (SCALAR_FLOAT_MODE_P (mode))
+ {
+ rtx second_test = NULL, bypass_test = NULL;
+ rtx compare_op, compare_seq;
+
+ gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
+
+ /* Shortcut: following common codes never translate
+ into carry flag compares. */
+ if (code == EQ || code == NE || code == UNEQ || code == LTGT
+ || code == ORDERED || code == UNORDERED)
+ return false;
+
+ /* These comparisons require zero flag; swap operands so they won't. */
+ if ((code == GT || code == UNLE || code == LE || code == UNGT)
+ && !TARGET_IEEE_FP)
+ {
+ rtx tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ code = swap_condition (code);
+ }
+
+ /* Try to expand the comparison and verify that we end up with
+ carry flag based comparison. This fails to be true only when
+ we decide to expand comparison using arithmetic that is not
+ too common scenario. */
+ start_sequence ();
+ compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
+ &second_test, &bypass_test);
+ compare_seq = get_insns ();
+ end_sequence ();
+
+ if (second_test || bypass_test)
+ return false;
+
+ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+ || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+ code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
+ else
+ code = GET_CODE (compare_op);
+
+ if (code != LTU && code != GEU)
+ return false;
+
+ emit_insn (compare_seq);
+ *pop = compare_op;
+ return true;
+ }
+
+ if (!INTEGRAL_MODE_P (mode))
+ return false;
+
+ switch (code)
+ {
+ case LTU:
+ case GEU:
+ break;
+
+ /* Convert a==0 into (unsigned)a<1. */
+ case EQ:
+ case NE:
+ if (op1 != const0_rtx)
+ return false;
+ op1 = const1_rtx;
+ code = (code == EQ ? LTU : GEU);
+ break;
+
+ /* Convert a>b into b<a or a>=b-1. */
+ case GTU:
+ case LEU:
+ if (CONST_INT_P (op1))
+ {
+ op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
+ /* Bail out on overflow. We still can swap operands but that
+ would force loading of the constant into register. */
+ if (op1 == const0_rtx
+ || !x86_64_immediate_operand (op1, GET_MODE (op1)))
+ return false;
+ code = (code == GTU ? GEU : LTU);
+ }
+ else
+ {
+ rtx tmp = op1;
+ op1 = op0;
+ op0 = tmp;
+ code = (code == GTU ? LTU : GEU);
+ }
+ break;
+
+ /* Convert a>=0 into (unsigned)a<0x80000000. */
+ case LT:
+ case GE:
+ if (mode == DImode || op1 != const0_rtx)
+ return false;
+ op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
+ code = (code == LT ? GEU : LTU);
+ break;
+ case LE:
+ case GT:
+ if (mode == DImode || op1 != constm1_rtx)
+ return false;
+ op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
+ code = (code == LE ? GEU : LTU);
+ break;
+
+ default:
+ return false;
+ }
+ /* Swapping operands may cause constant to appear as first operand. */
+ if (!nonimmediate_operand (op0, VOIDmode))
+ {
+ if (!can_create_pseudo_p ())
+ return false;
+ op0 = force_reg (mode, op0);
+ }
+ ix86_compare_op0 = op0;
+ ix86_compare_op1 = op1;
+ *pop = ix86_expand_compare (code, NULL, NULL);
+ gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
+ return true;
+}
+
+int
+ix86_expand_int_movcc (rtx operands[])
+{
+ enum rtx_code code = GET_CODE (operands[1]), compare_code;
+ rtx compare_seq, compare_op;
+ rtx second_test, bypass_test;
+ enum machine_mode mode = GET_MODE (operands[0]);
+ bool sign_bit_compare_p = false;;
+
+ start_sequence ();
+ compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
+ compare_seq = get_insns ();
+ end_sequence ();
+
+ compare_code = GET_CODE (compare_op);
+
+ if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
+ || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
+ sign_bit_compare_p = true;
+
+ /* Don't attempt mode expansion here -- if we had to expand 5 or 6
+ HImode insns, we'd be swallowed in word prefix ops. */
+
+ if ((mode != HImode || TARGET_FAST_PREFIX)
+ && (mode != (TARGET_64BIT ? TImode : DImode))
+ && CONST_INT_P (operands[2])
+ && CONST_INT_P (operands[3]))
+ {
+ rtx out = operands[0];
+ HOST_WIDE_INT ct = INTVAL (operands[2]);
+ HOST_WIDE_INT cf = INTVAL (operands[3]);
+ HOST_WIDE_INT diff;
+
+ diff = ct - cf;
+ /* Sign bit compares are better done using shifts than we do by using
+ sbb. */
+ if (sign_bit_compare_p
+ || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
+ ix86_compare_op1, &compare_op))
+ {
+ /* Detect overlap between destination and compare sources. */
+ rtx tmp = out;
+
+ if (!sign_bit_compare_p)
+ {
+ bool fpcmp = false;
+
+ compare_code = GET_CODE (compare_op);
+
+ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+ || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+ {
+ fpcmp = true;
+ compare_code = ix86_fp_compare_code_to_integer (compare_code);
+ }
+
+ /* To simplify rest of code, restrict to the GEU case. */
+ if (compare_code == LTU)
+ {
+ HOST_WIDE_INT tmp = ct;
+ ct = cf;
+ cf = tmp;
+ compare_code = reverse_condition (compare_code);
+ code = reverse_condition (code);
+ }
+ else
+ {
+ if (fpcmp)
+ PUT_CODE (compare_op,
+ reverse_condition_maybe_unordered
+ (GET_CODE (compare_op)));
+ else
+ PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
+ }
+ diff = ct - cf;
+
+ if (reg_overlap_mentioned_p (out, ix86_compare_op0)
+ || reg_overlap_mentioned_p (out, ix86_compare_op1))
+ tmp = gen_reg_rtx (mode);
+
+ if (mode == DImode)
+ emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
+ else
+ emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
+ }
+ else
+ {
+ if (code == GT || code == GE)
+ code = reverse_condition (code);
+ else
+ {
+ HOST_WIDE_INT tmp = ct;
+ ct = cf;
+ cf = tmp;
+ diff = ct - cf;
+ }
+ tmp = emit_store_flag (tmp, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, -1);
+ }
+
+ if (diff == 1)
+ {
+ /*
+ * cmpl op0,op1
+ * sbbl dest,dest
+ * [addl dest, ct]
+ *
+ * Size 5 - 8.
+ */
+ if (ct)
+ tmp = expand_simple_binop (mode, PLUS,
+ tmp, GEN_INT (ct),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ }
+ else if (cf == -1)
+ {
+ /*
+ * cmpl op0,op1
+ * sbbl dest,dest
+ * orl $ct, dest
+ *
+ * Size 8.
+ */
+ tmp = expand_simple_binop (mode, IOR,
+ tmp, GEN_INT (ct),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ }
+ else if (diff == -1 && ct)
+ {
+ /*
+ * cmpl op0,op1
+ * sbbl dest,dest
+ * notl dest
+ * [addl dest, cf]
+ *
+ * Size 8 - 11.
+ */
+ tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
+ if (cf)
+ tmp = expand_simple_binop (mode, PLUS,
+ copy_rtx (tmp), GEN_INT (cf),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ }
+ else
+ {
+ /*
+ * cmpl op0,op1
+ * sbbl dest,dest
+ * [notl dest]
+ * andl cf - ct, dest
+ * [addl dest, ct]
+ *
+ * Size 8 - 11.
+ */
+
+ if (cf == 0)
+ {
+ cf = ct;
+ ct = 0;
+ tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
+ }
+
+ tmp = expand_simple_binop (mode, AND,
+ copy_rtx (tmp),
+ gen_int_mode (cf - ct, mode),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ if (ct)
+ tmp = expand_simple_binop (mode, PLUS,
+ copy_rtx (tmp), GEN_INT (ct),
+ copy_rtx (tmp), 1, OPTAB_DIRECT);
+ }
+
+ if (!rtx_equal_p (tmp, out))
+ emit_move_insn (copy_rtx (out), copy_rtx (tmp));
+
+ return 1; /* DONE */
+ }
+
+ if (diff < 0)
+ {
+ enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
+
+ HOST_WIDE_INT tmp;
+ tmp = ct, ct = cf, cf = tmp;
+ diff = -diff;
+
+ if (SCALAR_FLOAT_MODE_P (cmp_mode))
+ {
+ gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
+
+ /* We may be reversing unordered compare to normal compare, that
+ is not valid in general (we may convert non-trapping condition
+ to trapping one), however on i386 we currently emit all
+ comparisons unordered. */
+ compare_code = reverse_condition_maybe_unordered (compare_code);
+ code = reverse_condition_maybe_unordered (code);
+ }
+ else
+ {
+ compare_code = reverse_condition (compare_code);
+ code = reverse_condition (code);
+ }
+ }
+
+ compare_code = UNKNOWN;
+ if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
+ && CONST_INT_P (ix86_compare_op1))
+ {
+ if (ix86_compare_op1 == const0_rtx
+ && (code == LT || code == GE))
+ compare_code = code;
+ else if (ix86_compare_op1 == constm1_rtx)
+ {
+ if (code == LE)
+ compare_code = LT;
+ else if (code == GT)
+ compare_code = GE;
+ }
+ }
+
+ /* Optimize dest = (op0 < 0) ? -1 : cf. */
+ if (compare_code != UNKNOWN
+ && GET_MODE (ix86_compare_op0) == GET_MODE (out)
+ && (cf == -1 || ct == -1))
+ {
+ /* If lea code below could be used, only optimize
+ if it results in a 2 insn sequence. */
+
+ if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
+ || diff == 3 || diff == 5 || diff == 9)
+ || (compare_code == LT && ct == -1)
+ || (compare_code == GE && cf == -1))
+ {
+ /*
+ * notl op1 (if necessary)
+ * sarl $31, op1
+ * orl cf, op1
+ */
+ if (ct != -1)
+ {
+ cf = ct;
+ ct = -1;
+ code = reverse_condition (code);
+ }
+
+ out = emit_store_flag (out, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, -1);
+
+ out = expand_simple_binop (mode, IOR,
+ out, GEN_INT (cf),
+ out, 1, OPTAB_DIRECT);
+ if (out != operands[0])
+ emit_move_insn (operands[0], out);
+
+ return 1; /* DONE */
+ }
+ }
+
+
+ if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
+ || diff == 3 || diff == 5 || diff == 9)
+ && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
+ && (mode != DImode
+ || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
+ {
+ /*
+ * xorl dest,dest
+ * cmpl op1,op2
+ * setcc dest
+ * lea cf(dest*(ct-cf)),dest
+ *
+ * Size 14.
+ *
+ * This also catches the degenerate setcc-only case.
+ */
+
+ rtx tmp;
+ int nops;
+
+ out = emit_store_flag (out, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, 1);
+
+ nops = 0;
+ /* On x86_64 the lea instruction operates on Pmode, so we need
+ to get arithmetics done in proper mode to match. */
+ if (diff == 1)
+ tmp = copy_rtx (out);
+ else
+ {
+ rtx out1;
+ out1 = copy_rtx (out);
+ tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
+ nops++;
+ if (diff & 1)
+ {
+ tmp = gen_rtx_PLUS (mode, tmp, out1);
+ nops++;
+ }
+ }
+ if (cf != 0)
+ {
+ tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
+ nops++;
+ }
+ if (!rtx_equal_p (tmp, out))
+ {
+ if (nops == 1)
+ out = force_operand (tmp, copy_rtx (out));
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
+ }
+ if (!rtx_equal_p (out, operands[0]))
+ emit_move_insn (operands[0], copy_rtx (out));
+
+ return 1; /* DONE */
+ }
+
+ /*
+ * General case: Jumpful:
+ * xorl dest,dest cmpl op1, op2
+ * cmpl op1, op2 movl ct, dest
+ * setcc dest jcc 1f
+ * decl dest movl cf, dest
+ * andl (cf-ct),dest 1:
+ * addl ct,dest
+ *
+ * Size 20. Size 14.
+ *
+ * This is reasonably steep, but branch mispredict costs are
+ * high on modern cpus, so consider failing only if optimizing
+ * for space.
+ */
+
+ if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
+ && BRANCH_COST (optimize_insn_for_speed_p (),
+ false) >= 2)
+ {
+ if (cf == 0)
+ {
+ enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
+
+ cf = ct;
+ ct = 0;
+
+ if (SCALAR_FLOAT_MODE_P (cmp_mode))
+ {
+ gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
+
+ /* We may be reversing unordered compare to normal compare,
+ that is not valid in general (we may convert non-trapping
+ condition to trapping one), however on i386 we currently
+ emit all comparisons unordered. */
+ code = reverse_condition_maybe_unordered (code);
+ }
+ else
+ {
+ code = reverse_condition (code);
+ if (compare_code != UNKNOWN)
+ compare_code = reverse_condition (compare_code);
+ }
+ }
+
+ if (compare_code != UNKNOWN)
+ {
+ /* notl op1 (if needed)
+ sarl $31, op1
+ andl (cf-ct), op1
+ addl ct, op1
+
+ For x < 0 (resp. x <= -1) there will be no notl,
+ so if possible swap the constants to get rid of the
+ complement.
+ True/false will be -1/0 while code below (store flag
+ followed by decrement) is 0/-1, so the constants need
+ to be exchanged once more. */
+
+ if (compare_code == GE || !cf)
+ {
+ code = reverse_condition (code);
+ compare_code = LT;
+ }
+ else
+ {
+ HOST_WIDE_INT tmp = cf;
+ cf = ct;
+ ct = tmp;
+ }
+
+ out = emit_store_flag (out, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, -1);
+ }
+ else
+ {
+ out = emit_store_flag (out, code, ix86_compare_op0,
+ ix86_compare_op1, VOIDmode, 0, 1);
+
+ out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
+ copy_rtx (out), 1, OPTAB_DIRECT);
+ }
+
+ out = expand_simple_binop (mode, AND, copy_rtx (out),
+ gen_int_mode (cf - ct, mode),
+ copy_rtx (out), 1, OPTAB_DIRECT);
+ if (ct)
+ out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
+ copy_rtx (out), 1, OPTAB_DIRECT);
+ if (!rtx_equal_p (out, operands[0]))
+ emit_move_insn (operands[0], copy_rtx (out));
+
+ return 1; /* DONE */
+ }
+ }
+
+ if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
+ {
+ /* Try a few things more with specific constants and a variable. */
+
+ optab op;
+ rtx var, orig_out, out, tmp;
+
+ if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
+ return 0; /* FAIL */
+
+ /* If one of the two operands is an interesting constant, load a
+ constant with the above and mask it in with a logical operation. */
+
+ if (CONST_INT_P (operands[2]))
+ {
+ var = operands[3];
+ if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
+ operands[3] = constm1_rtx, op = and_optab;
+ else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
+ operands[3] = const0_rtx, op = ior_optab;
+ else
+ return 0; /* FAIL */
+ }
+ else if (CONST_INT_P (operands[3]))
+ {
+ var = operands[2];
+ if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
+ operands[2] = constm1_rtx, op = and_optab;
+ else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
+ operands[2] = const0_rtx, op = ior_optab;
+ else
+ return 0; /* FAIL */
+ }
+ else
+ return 0; /* FAIL */
+
+ orig_out = operands[0];
+ tmp = gen_reg_rtx (mode);
+ operands[0] = tmp;
+
+ /* Recurse to get the constant loaded. */
+ if (ix86_expand_int_movcc (operands) == 0)
+ return 0; /* FAIL */
+
+ /* Mask in the interesting variable. */
+ out = expand_binop (mode, op, var, tmp, orig_out, 0,
+ OPTAB_WIDEN);
+ if (!rtx_equal_p (out, orig_out))
+ emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
+
+ return 1; /* DONE */
+ }
+
+ /*
+ * For comparison with above,
+ *
+ * movl cf,dest
+ * movl ct,tmp
+ * cmpl op1,op2
+ * cmovcc tmp,dest
+ *
+ * Size 15.
+ */
+
+ if (! nonimmediate_operand (operands[2], mode))
+ operands[2] = force_reg (mode, operands[2]);
+ if (! nonimmediate_operand (operands[3], mode))
+ operands[3] = force_reg (mode, operands[3]);
+
+ if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
+ {
+ rtx tmp = gen_reg_rtx (mode);
+ emit_move_insn (tmp, operands[3]);
+ operands[3] = tmp;
+ }
+ if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
+ {
+ rtx tmp = gen_reg_rtx (mode);
+ emit_move_insn (tmp, operands[2]);
+ operands[2] = tmp;
+ }
+
+ if (! register_operand (operands[2], VOIDmode)
+ && (mode == QImode
+ || ! register_operand (operands[3], VOIDmode)))
+ operands[2] = force_reg (mode, operands[2]);
+
+ if (mode == QImode
+ && ! register_operand (operands[3], VOIDmode))
+ operands[3] = force_reg (mode, operands[3]);
+
+ emit_insn (compare_seq);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_IF_THEN_ELSE (mode,
+ compare_op, operands[2],
+ operands[3])));
+ if (bypass_test)
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
+ gen_rtx_IF_THEN_ELSE (mode,
+ bypass_test,
+ copy_rtx (operands[3]),
+ copy_rtx (operands[0]))));
+ if (second_test)
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
+ gen_rtx_IF_THEN_ELSE (mode,
+ second_test,
+ copy_rtx (operands[2]),
+ copy_rtx (operands[0]))));
+
+ return 1; /* DONE */
+}
+
+/* Swap, force into registers, or otherwise massage the two operands
+ to an sse comparison with a mask result. Thus we differ a bit from
+ ix86_prepare_fp_compare_args which expects to produce a flags result.
+
+ The DEST operand exists to help determine whether to commute commutative
+ operators. The POP0/POP1 operands are updated in place. The new
+ comparison code is returned, or UNKNOWN if not implementable. */
+
+static enum rtx_code
+ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
+ rtx *pop0, rtx *pop1)
+{
+ rtx tmp;
+
+ switch (code)
+ {
+ case LTGT:
+ case UNEQ:
+ /* We have no LTGT as an operator. We could implement it with
+ NE & ORDERED, but this requires an extra temporary. It's
+ not clear that it's worth it. */
+ return UNKNOWN;
+
+ case LT:
+ case LE:
+ case UNGT:
+ case UNGE:
+ /* These are supported directly. */
+ break;
+
+ case EQ:
+ case NE:
+ case UNORDERED:
+ case ORDERED:
+ /* For commutative operators, try to canonicalize the destination
+ operand to be first in the comparison - this helps reload to
+ avoid extra moves. */
+ if (!dest || !rtx_equal_p (dest, *pop1))
+ break;
+ /* FALLTHRU */
+
+ case GE:
+ case GT:
+ case UNLE:
+ case UNLT:
+ /* These are not supported directly. Swap the comparison operands
+ to transform into something that is supported. */
+ tmp = *pop0;
+ *pop0 = *pop1;
+ *pop1 = tmp;
+ code = swap_condition (code);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return code;
+}
+
+/* Detect conditional moves that exactly match min/max operational
+ semantics. Note that this is IEEE safe, as long as we don't
+ interchange the operands.
+
+ Returns FALSE if this conditional move doesn't match a MIN/MAX,
+ and TRUE if the operation is successful and instructions are emitted. */
+
+static bool
+ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
+ rtx cmp_op1, rtx if_true, rtx if_false)
+{
+ enum machine_mode mode;
+ bool is_min;
+ rtx tmp;
+
+ if (code == LT)
+ ;
+ else if (code == UNGE)
+ {
+ tmp = if_true;
+ if_true = if_false;
+ if_false = tmp;
+ }
+ else
+ return false;
+
+ if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
+ is_min = true;
+ else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
+ is_min = false;
+ else
+ return false;
+
+ mode = GET_MODE (dest);
+
+ /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
+ but MODE may be a vector mode and thus not appropriate. */
+ if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
+ {
+ int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
+ rtvec v;
+
+ if_true = force_reg (mode, if_true);
+ v = gen_rtvec (2, if_true, if_false);
+ tmp = gen_rtx_UNSPEC (mode, v, u);
+ }
+ else
+ {
+ code = is_min ? SMIN : SMAX;
+ tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+ return true;
+}
+
+/* Expand an sse vector comparison. Return the register with the result. */
+
+static rtx
+ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
+ rtx op_true, rtx op_false)
+{
+ enum machine_mode mode = GET_MODE (dest);
+ rtx x;
+
+ cmp_op0 = force_reg (mode, cmp_op0);
+ if (!nonimmediate_operand (cmp_op1, mode))
+ cmp_op1 = force_reg (mode, cmp_op1);
+
+ if (optimize
+ || reg_overlap_mentioned_p (dest, op_true)
+ || reg_overlap_mentioned_p (dest, op_false))
+ dest = gen_reg_rtx (mode);
+
+ x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+
+ return dest;
+}
+
+/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
+ operations. This is used for both scalar and vector conditional moves. */
+
+static void
+ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
+{
+ enum machine_mode mode = GET_MODE (dest);
+ rtx t2, t3, x;
+
+ if (op_false == CONST0_RTX (mode))
+ {
+ op_true = force_reg (mode, op_true);
+ x = gen_rtx_AND (mode, cmp, op_true);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+ else if (op_true == CONST0_RTX (mode))
+ {
+ op_false = force_reg (mode, op_false);
+ x = gen_rtx_NOT (mode, cmp);
+ x = gen_rtx_AND (mode, x, op_false);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+ else if (TARGET_SSE5)
+ {
+ rtx pcmov = gen_rtx_SET (mode, dest,
+ gen_rtx_IF_THEN_ELSE (mode, cmp,
+ op_true,
+ op_false));
+ emit_insn (pcmov);
+ }
+ else
+ {
+ op_true = force_reg (mode, op_true);
+ op_false = force_reg (mode, op_false);
+
+ t2 = gen_reg_rtx (mode);
+ if (optimize)
+ t3 = gen_reg_rtx (mode);
+ else
+ t3 = dest;
+
+ x = gen_rtx_AND (mode, op_true, cmp);
+ emit_insn (gen_rtx_SET (VOIDmode, t2, x));
+
+ x = gen_rtx_NOT (mode, cmp);
+ x = gen_rtx_AND (mode, x, op_false);
+ emit_insn (gen_rtx_SET (VOIDmode, t3, x));
+
+ x = gen_rtx_IOR (mode, t3, t2);
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ }
+}
+
+/* Expand a floating-point conditional move. Return true if successful. */
+
+int
+ix86_expand_fp_movcc (rtx operands[])
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ enum rtx_code code = GET_CODE (operands[1]);
+ rtx tmp, compare_op, second_test, bypass_test;
+
+ if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
+ {
+ enum machine_mode cmode;
+
+ /* Since we've no cmove for sse registers, don't force bad register
+ allocation just to gain access to it. Deny movcc when the
+ comparison mode doesn't match the move mode. */
+ cmode = GET_MODE (ix86_compare_op0);
+ if (cmode == VOIDmode)
+ cmode = GET_MODE (ix86_compare_op1);
+ if (cmode != mode)
+ return 0;
+
+ code = ix86_prepare_sse_fp_compare_args (operands[0], code,
+ &ix86_compare_op0,
+ &ix86_compare_op1);
+ if (code == UNKNOWN)
+ return 0;
+
+ if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
+ ix86_compare_op1, operands[2],
+ operands[3]))
+ return 1;
+
+ tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
+ ix86_compare_op1, operands[2], operands[3]);
+ ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
+ return 1;
+ }
+
+ /* The floating point conditional move instructions don't directly
+ support conditions resulting from a signed integer comparison. */
+
+ compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
+
+ /* The floating point conditional move instructions don't directly
+ support signed integer comparisons. */
+
+ if (!fcmov_comparison_operator (compare_op, VOIDmode))
+ {
+ gcc_assert (!second_test && !bypass_test);
+ tmp = gen_reg_rtx (QImode);
+ ix86_expand_setcc (code, tmp);
+ code = NE;
+ ix86_compare_op0 = tmp;
+ ix86_compare_op1 = const0_rtx;
+ compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
+ }
+ if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
+ {
+ tmp = gen_reg_rtx (mode);
+ emit_move_insn (tmp, operands[3]);
+ operands[3] = tmp;
+ }
+ if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
+ {
+ tmp = gen_reg_rtx (mode);
+ emit_move_insn (tmp, operands[2]);
+ operands[2] = tmp;
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_IF_THEN_ELSE (mode, compare_op,
+ operands[2], operands[3])));
+ if (bypass_test)
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_IF_THEN_ELSE (mode, bypass_test,
+ operands[3], operands[0])));
+ if (second_test)
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_IF_THEN_ELSE (mode, second_test,
+ operands[2], operands[0])));
+
+ return 1;
+}
+
+/* Expand a floating-point vector conditional move; a vcond operation
+ rather than a movcc operation. */
+
+bool
+ix86_expand_fp_vcond (rtx operands[])
+{
+ enum rtx_code code = GET_CODE (operands[3]);
+ rtx cmp;
+
+ code = ix86_prepare_sse_fp_compare_args (operands[0], code,
+ &operands[4], &operands[5]);
+ if (code == UNKNOWN)
+ return false;
+
+ if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
+ operands[5], operands[1], operands[2]))
+ return true;
+
+ cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
+ operands[1], operands[2]);
+ ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+ return true;
+}
+
+/* Expand a signed/unsigned integral vector conditional move. */
+
+bool
+ix86_expand_int_vcond (rtx operands[])
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ enum rtx_code code = GET_CODE (operands[3]);
+ bool negate = false;
+ rtx x, cop0, cop1;
+
+ cop0 = operands[4];
+ cop1 = operands[5];
+
+ /* SSE5 supports all of the comparisons on all vector int types. */
+ if (!TARGET_SSE5)
+ {
+ /* Canonicalize the comparison to EQ, GT, GTU. */
+ switch (code)
+ {
+ case EQ:
+ case GT:
+ case GTU:
+ break;
+
+ case NE:
+ case LE:
+ case LEU:
+ code = reverse_condition (code);
+ negate = true;
+ break;
+
+ case GE:
+ case GEU:
+ code = reverse_condition (code);
+ negate = true;
+ /* FALLTHRU */
+
+ case LT:
+ case LTU:
+ code = swap_condition (code);
+ x = cop0, cop0 = cop1, cop1 = x;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Only SSE4.1/SSE4.2 supports V2DImode. */
+ if (mode == V2DImode)
+ {
+ switch (code)
+ {
+ case EQ:
+ /* SSE4.1 supports EQ. */
+ if (!TARGET_SSE4_1)
+ return false;
+ break;
+
+ case GT:
+ case GTU:
+ /* SSE4.2 supports GT/GTU. */
+ if (!TARGET_SSE4_2)
+ return false;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ /* Unsigned parallel compare is not supported by the hardware. Play some
+ tricks to turn this into a signed comparison against 0. */
+ if (code == GTU)
+ {
+ cop0 = force_reg (mode, cop0);
+
+ switch (mode)
+ {
+ case V4SImode:
+ case V2DImode:
+ {
+ rtx t1, t2, mask;
+
+ /* Perform a parallel modulo subtraction. */
+ t1 = gen_reg_rtx (mode);
+ emit_insn ((mode == V4SImode
+ ? gen_subv4si3
+ : gen_subv2di3) (t1, cop0, cop1));
+
+ /* Extract the original sign bit of op0. */
+ mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
+ true, false);
+ t2 = gen_reg_rtx (mode);
+ emit_insn ((mode == V4SImode
+ ? gen_andv4si3
+ : gen_andv2di3) (t2, cop0, mask));
+
+ /* XOR it back into the result of the subtraction. This results
+ in the sign bit set iff we saw unsigned underflow. */
+ x = gen_reg_rtx (mode);
+ emit_insn ((mode == V4SImode
+ ? gen_xorv4si3
+ : gen_xorv2di3) (x, t1, t2));
+
+ code = GT;
+ }
+ break;
+
+ case V16QImode:
+ case V8HImode:
+ /* Perform a parallel unsigned saturating subtraction. */
+ x = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, x,
+ gen_rtx_US_MINUS (mode, cop0, cop1)));
+
+ code = EQ;
+ negate = !negate;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ cop0 = x;
+ cop1 = CONST0_RTX (mode);
+ }
+ }
+
+ x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
+ operands[1+negate], operands[2-negate]);
+
+ ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
+ operands[2-negate]);
+ return true;
+}
+
+/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
+ true if we should do zero extension, else sign extension. HIGH_P is
+ true if we want the N/2 high elements, else the low elements. */
+
+void
+ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+ enum machine_mode imode = GET_MODE (operands[1]);
+ rtx (*unpack)(rtx, rtx, rtx);
+ rtx se, dest;
+
+ switch (imode)
+ {
+ case V16QImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv16qi;
+ else
+ unpack = gen_vec_interleave_lowv16qi;
+ break;
+ case V8HImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv8hi;
+ else
+ unpack = gen_vec_interleave_lowv8hi;
+ break;
+ case V4SImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv4si;
+ else
+ unpack = gen_vec_interleave_lowv4si;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ dest = gen_lowpart (imode, operands[0]);
+
+ if (unsigned_p)
+ se = force_reg (imode, CONST0_RTX (imode));
+ else
+ se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
+ operands[1], pc_rtx, pc_rtx);
+
+ emit_insn (unpack (dest, operands[1], se));
+}
+
+/* This function performs the same task as ix86_expand_sse_unpack,
+ but with SSE4.1 instructions. */
+
+void
+ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+ enum machine_mode imode = GET_MODE (operands[1]);
+ rtx (*unpack)(rtx, rtx);
+ rtx src, dest;
+
+ switch (imode)
+ {
+ case V16QImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+ else
+ unpack = gen_sse4_1_extendv8qiv8hi2;
+ break;
+ case V8HImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv4hiv4si2;
+ else
+ unpack = gen_sse4_1_extendv4hiv4si2;
+ break;
+ case V4SImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv2siv2di2;
+ else
+ unpack = gen_sse4_1_extendv2siv2di2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ dest = operands[0];
+ if (high_p)
+ {
+ /* Shift higher 8 bytes to lower 8 bytes. */
+ src = gen_reg_rtx (imode);
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
+ gen_lowpart (TImode, operands[1]),
+ GEN_INT (64)));
+ }
+ else
+ src = operands[1];
+
+ emit_insn (unpack (dest, src));
+}
+
+/* This function performs the same task as ix86_expand_sse_unpack,
+ but with sse5 instructions. */
+
+void
+ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+ enum machine_mode imode = GET_MODE (operands[1]);
+ int pperm_bytes[16];
+ int i;
+ int h = (high_p) ? 8 : 0;
+ int h2;
+ int sign_extend;
+ rtvec v = rtvec_alloc (16);
+ rtvec vs;
+ rtx x, p;
+ rtx op0 = operands[0], op1 = operands[1];
+
+ switch (imode)
+ {
+ case V16QImode:
+ vs = rtvec_alloc (8);
+ h2 = (high_p) ? 8 : 0;
+ for (i = 0; i < 8; i++)
+ {
+ pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
+ pperm_bytes[2*i+1] = ((unsigned_p)
+ ? PPERM_ZERO
+ : PPERM_SIGN | PPERM_SRC2 | i | h);
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
+
+ for (i = 0; i < 8; i++)
+ RTVEC_ELT (vs, i) = GEN_INT (i + h2);
+
+ p = gen_rtx_PARALLEL (VOIDmode, vs);
+ x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
+ if (unsigned_p)
+ emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
+ else
+ emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
+ break;
+
+ case V8HImode:
+ vs = rtvec_alloc (4);
+ h2 = (high_p) ? 4 : 0;
+ for (i = 0; i < 4; i++)
+ {
+ sign_extend = ((unsigned_p)
+ ? PPERM_ZERO
+ : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
+ pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
+ pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
+ pperm_bytes[4*i+2] = sign_extend;
+ pperm_bytes[4*i+3] = sign_extend;
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
+
+ for (i = 0; i < 4; i++)
+ RTVEC_ELT (vs, i) = GEN_INT (i + h2);
+
+ p = gen_rtx_PARALLEL (VOIDmode, vs);
+ x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
+ if (unsigned_p)
+ emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
+ else
+ emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
+ break;
+
+ case V4SImode:
+ vs = rtvec_alloc (2);
+ h2 = (high_p) ? 2 : 0;
+ for (i = 0; i < 2; i++)
+ {
+ sign_extend = ((unsigned_p)
+ ? PPERM_ZERO
+ : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
+ pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
+ pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
+ pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
+ pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
+ pperm_bytes[8*i+4] = sign_extend;
+ pperm_bytes[8*i+5] = sign_extend;
+ pperm_bytes[8*i+6] = sign_extend;
+ pperm_bytes[8*i+7] = sign_extend;
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
+
+ for (i = 0; i < 2; i++)
+ RTVEC_ELT (vs, i) = GEN_INT (i + h2);
+
+ p = gen_rtx_PARALLEL (VOIDmode, vs);
+ x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
+ if (unsigned_p)
+ emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
+ else
+ emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return;
+}
+
+/* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
+ next narrower integer vector type */
+void
+ix86_expand_sse5_pack (rtx operands[3])
+{
+ enum machine_mode imode = GET_MODE (operands[0]);
+ int pperm_bytes[16];
+ int i;
+ rtvec v = rtvec_alloc (16);
+ rtx x;
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+
+ switch (imode)
+ {
+ case V16QImode:
+ for (i = 0; i < 8; i++)
+ {
+ pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
+ pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
+
+ x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
+ emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
+ break;
+
+ case V8HImode:
+ for (i = 0; i < 4; i++)
+ {
+ pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
+ pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
+ pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
+ pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
+
+ x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
+ emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
+ break;
+
+ case V4SImode:
+ for (i = 0; i < 2; i++)
+ {
+ pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
+ pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
+ pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
+ pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
+ pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
+ pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
+ pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
+ pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
+ }
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
+
+ x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
+ emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return;
+}
+
+/* Expand conditional increment or decrement using adb/sbb instructions.
+ The default case using setcc followed by the conditional move can be
+ done by generic code. */
+int
+ix86_expand_int_addcc (rtx operands[])
+{
+ enum rtx_code code = GET_CODE (operands[1]);
+ rtx compare_op;
+ rtx val = const0_rtx;
+ bool fpcmp = false;
+ enum machine_mode mode = GET_MODE (operands[0]);
+
+ if (operands[3] != const1_rtx
+ && operands[3] != constm1_rtx)
+ return 0;
+ if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
+ ix86_compare_op1, &compare_op))
+ return 0;
+ code = GET_CODE (compare_op);
+
+ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+ || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+ {
+ fpcmp = true;
+ code = ix86_fp_compare_code_to_integer (code);
+ }
+
+ if (code != LTU)
+ {
+ val = constm1_rtx;
+ if (fpcmp)
+ PUT_CODE (compare_op,
+ reverse_condition_maybe_unordered
+ (GET_CODE (compare_op)));
+ else
+ PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
+ }
+ PUT_MODE (compare_op, mode);
+
+ /* Construct either adc or sbb insn. */
+ if ((code == LTU) == (operands[3] == constm1_rtx))
+ {
+ switch (GET_MODE (operands[0]))
+ {
+ case QImode:
+ emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case HImode:
+ emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case SImode:
+ emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case DImode:
+ emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ switch (GET_MODE (operands[0]))
+ {
+ case QImode:
+ emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case HImode:
+ emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case SImode:
+ emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case DImode:
+ emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ return 1; /* DONE */
+}
+
+
+/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
+ works for floating pointer parameters and nonoffsetable memories.
+ For pushes, it returns just stack offsets; the values will be saved
+ in the right order. Maximally three parts are generated. */
+
+static int
+ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
+{
+ int size;
+
+ if (!TARGET_64BIT)
+ size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
+ else
+ size = (GET_MODE_SIZE (mode) + 4) / 8;
+
+ gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
+ gcc_assert (size >= 2 && size <= 4);
+
+ /* Optimize constant pool reference to immediates. This is used by fp
+ moves, that force all constants to memory to allow combining. */
+ if (MEM_P (operand) && MEM_READONLY_P (operand))
+ {
+ rtx tmp = maybe_get_pool_constant (operand);
+ if (tmp)
+ operand = tmp;
+ }
+
+ if (MEM_P (operand) && !offsettable_memref_p (operand))
+ {
+ /* The only non-offsetable memories we handle are pushes. */
+ int ok = push_operand (operand, VOIDmode);
+
+ gcc_assert (ok);
+
+ operand = copy_rtx (operand);
+ PUT_MODE (operand, Pmode);
+ parts[0] = parts[1] = parts[2] = parts[3] = operand;
+ return size;
+ }
+
+ if (GET_CODE (operand) == CONST_VECTOR)
+ {
+ enum machine_mode imode = int_mode_for_mode (mode);
+ /* Caution: if we looked through a constant pool memory above,
+ the operand may actually have a different mode now. That's
+ ok, since we want to pun this all the way back to an integer. */
+ operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
+ gcc_assert (operand != NULL);
+ mode = imode;
+ }
+
+ if (!TARGET_64BIT)
+ {
+ if (mode == DImode)
+ split_di (&operand, 1, &parts[0], &parts[1]);
+ else
+ {
+ int i;
+
+ if (REG_P (operand))
+ {
+ gcc_assert (reload_completed);
+ for (i = 0; i < size; i++)
+ parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
+ }
+ else if (offsettable_memref_p (operand))
+ {
+ operand = adjust_address (operand, SImode, 0);
+ parts[0] = operand;
+ for (i = 1; i < size; i++)
+ parts[i] = adjust_address (operand, SImode, 4 * i);
+ }
+ else if (GET_CODE (operand) == CONST_DOUBLE)
+ {
+ REAL_VALUE_TYPE r;
+ long l[4];
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
+ switch (mode)
+ {
+ case TFmode:
+ real_to_target (l, &r, mode);
+ parts[3] = gen_int_mode (l[3], SImode);
+ parts[2] = gen_int_mode (l[2], SImode);
+ break;
+ case XFmode:
+ REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
+ parts[2] = gen_int_mode (l[2], SImode);
+ break;
+ case DFmode:
+ REAL_VALUE_TO_TARGET_DOUBLE (r, l);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ parts[1] = gen_int_mode (l[1], SImode);
+ parts[0] = gen_int_mode (l[0], SImode);
+ }
+ else
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ if (mode == TImode)
+ split_ti (&operand, 1, &parts[0], &parts[1]);
+ if (mode == XFmode || mode == TFmode)
+ {
+ enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
+ if (REG_P (operand))
+ {
+ gcc_assert (reload_completed);
+ parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
+ parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
+ }
+ else if (offsettable_memref_p (operand))
+ {
+ operand = adjust_address (operand, DImode, 0);
+ parts[0] = operand;
+ parts[1] = adjust_address (operand, upper_mode, 8);
+ }
+ else if (GET_CODE (operand) == CONST_DOUBLE)
+ {
+ REAL_VALUE_TYPE r;
+ long l[4];
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
+ real_to_target (l, &r, mode);
+
+ /* Do not use shift by 32 to avoid warning on 32bit systems. */
+ if (HOST_BITS_PER_WIDE_INT >= 64)
+ parts[0]
+ = gen_int_mode
+ ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
+ + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
+ DImode);
+ else
+ parts[0] = immed_double_const (l[0], l[1], DImode);
+
+ if (upper_mode == SImode)
+ parts[1] = gen_int_mode (l[2], SImode);
+ else if (HOST_BITS_PER_WIDE_INT >= 64)
+ parts[1]
+ = gen_int_mode
+ ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
+ + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
+ DImode);
+ else
+ parts[1] = immed_double_const (l[2], l[3], DImode);
+ }
+ else
+ gcc_unreachable ();
+ }
+ }
+
+ return size;
+}
+
+/* Emit insns to perform a move or push of DI, DF, XF, and TF values.
+ Return false when normal moves are needed; true when all required
+ insns have been emitted. Operands 2-4 contain the input values
+ int the correct order; operands 5-7 contain the output values. */
+
+void
+ix86_split_long_move (rtx operands[])
+{
+ rtx part[2][4];
+ int nparts, i, j;
+ int push = 0;
+ int collisions = 0;
+ enum machine_mode mode = GET_MODE (operands[0]);
+ bool collisionparts[4];
+
+ /* The DFmode expanders may ask us to move double.
+ For 64bit target this is single move. By hiding the fact
+ here we simplify i386.md splitters. */
+ if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
+ {
+ /* Optimize constant pool reference to immediates. This is used by
+ fp moves, that force all constants to memory to allow combining. */
+
+ if (MEM_P (operands[1])
+ && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+ && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
+ operands[1] = get_pool_constant (XEXP (operands[1], 0));
+ if (push_operand (operands[0], VOIDmode))
+ {
+ operands[0] = copy_rtx (operands[0]);
+ PUT_MODE (operands[0], Pmode);
+ }
+ else
+ operands[0] = gen_lowpart (DImode, operands[0]);
+ operands[1] = gen_lowpart (DImode, operands[1]);
+ emit_move_insn (operands[0], operands[1]);
+ return;
+ }
+
+ /* The only non-offsettable memory we handle is push. */
+ if (push_operand (operands[0], VOIDmode))
+ push = 1;
+ else
+ gcc_assert (!MEM_P (operands[0])
+ || offsettable_memref_p (operands[0]));
+
+ nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
+ ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
+
+ /* When emitting push, take care for source operands on the stack. */
+ if (push && MEM_P (operands[1])
+ && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
+ for (i = 0; i < nparts - 1; i++)
+ part[1][i] = change_address (part[1][i],
+ GET_MODE (part[1][i]),
+ XEXP (part[1][i + 1], 0));
+
+ /* We need to do copy in the right order in case an address register
+ of the source overlaps the destination. */
+ if (REG_P (part[0][0]) && MEM_P (part[1][0]))
+ {
+ rtx tmp;
+
+ for (i = 0; i < nparts; i++)
+ {
+ collisionparts[i]
+ = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
+ if (collisionparts[i])
+ collisions++;
+ }
+
+ /* Collision in the middle part can be handled by reordering. */
+ if (collisions == 1 && nparts == 3 && collisionparts [1])
+ {
+ tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
+ tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
+ }
+ else if (collisions == 1
+ && nparts == 4
+ && (collisionparts [1] || collisionparts [2]))
+ {
+ if (collisionparts [1])
+ {
+ tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
+ tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
+ }
+ else
+ {
+ tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
+ tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
+ }
+ }
+
+ /* If there are more collisions, we can't handle it by reordering.
+ Do an lea to the last part and use only one colliding move. */
+ else if (collisions > 1)
+ {
+ rtx base;
+
+ collisions = 1;
+
+ base = part[0][nparts - 1];
+
+ /* Handle the case when the last part isn't valid for lea.
+ Happens in 64-bit mode storing the 12-byte XFmode. */
+ if (GET_MODE (base) != Pmode)
+ base = gen_rtx_REG (Pmode, REGNO (base));
+
+ emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
+ part[1][0] = replace_equiv_address (part[1][0], base);
+ for (i = 1; i < nparts; i++)
+ {
+ tmp = plus_constant (base, UNITS_PER_WORD * i);
+ part[1][i] = replace_equiv_address (part[1][i], tmp);
+ }
+ }
+ }
+
+ if (push)
+ {
+ if (!TARGET_64BIT)
+ {
+ if (nparts == 3)
+ {
+ if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
+ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
+ emit_move_insn (part[0][2], part[1][2]);
+ }
+ else if (nparts == 4)
+ {
+ emit_move_insn (part[0][3], part[1][3]);
+ emit_move_insn (part[0][2], part[1][2]);
+ }
+ }
+ else
+ {
+ /* In 64bit mode we don't have 32bit push available. In case this is
+ register, it is OK - we will just use larger counterpart. We also
+ retype memory - these comes from attempt to avoid REX prefix on
+ moving of second half of TFmode value. */
+ if (GET_MODE (part[1][1]) == SImode)
+ {
+ switch (GET_CODE (part[1][1]))
+ {
+ case MEM:
+ part[1][1] = adjust_address (part[1][1], DImode, 0);
+ break;
+
+ case REG:
+ part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (GET_MODE (part[1][0]) == SImode)
+ part[1][0] = part[1][1];
+ }
+ }
+ emit_move_insn (part[0][1], part[1][1]);
+ emit_move_insn (part[0][0], part[1][0]);
+ return;
+ }
+
+ /* Choose correct order to not overwrite the source before it is copied. */
+ if ((REG_P (part[0][0])
+ && REG_P (part[1][1])
+ && (REGNO (part[0][0]) == REGNO (part[1][1])
+ || (nparts == 3
+ && REGNO (part[0][0]) == REGNO (part[1][2]))
+ || (nparts == 4
+ && REGNO (part[0][0]) == REGNO (part[1][3]))))
+ || (collisions > 0
+ && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
+ {
+ for (i = 0, j = nparts - 1; i < nparts; i++, j--)
+ {
+ operands[2 + i] = part[0][j];
+ operands[6 + i] = part[1][j];
+ }
+ }
+ else
+ {
+ for (i = 0; i < nparts; i++)
+ {
+ operands[2 + i] = part[0][i];
+ operands[6 + i] = part[1][i];
+ }
+ }
+
+ /* If optimizing for size, attempt to locally unCSE nonzero constants. */
+ if (optimize_insn_for_size_p ())
+ {
+ for (j = 0; j < nparts - 1; j++)
+ if (CONST_INT_P (operands[6 + j])
+ && operands[6 + j] != const0_rtx
+ && REG_P (operands[2 + j]))
+ for (i = j; i < nparts - 1; i++)
+ if (CONST_INT_P (operands[7 + i])
+ && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
+ operands[7 + i] = operands[2 + j];
+ }
+
+ for (i = 0; i < nparts; i++)
+ emit_move_insn (operands[2 + i], operands[6 + i]);
+
+ return;
+}
+
+/* Helper function of ix86_split_ashl used to generate an SImode/DImode
+ left shift by a constant, either using a single shift or
+ a sequence of add instructions. */
+
+static void
+ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
+{
+ if (count == 1)
+ {
+ emit_insn ((mode == DImode
+ ? gen_addsi3
+ : gen_adddi3) (operand, operand, operand));
+ }
+ else if (!optimize_insn_for_size_p ()
+ && count * ix86_cost->add <= ix86_cost->shift_const)
+ {
+ int i;
+ for (i=0; i<count; i++)
+ {
+ emit_insn ((mode == DImode
+ ? gen_addsi3
+ : gen_adddi3) (operand, operand, operand));
+ }
+ }
+ else
+ emit_insn ((mode == DImode
+ ? gen_ashlsi3
+ : gen_ashldi3) (operand, operand, GEN_INT (count)));
+}
+
+void
+ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+ rtx low[2], high[2];
+ int count;
+ const int single_width = mode == DImode ? 32 : 64;
+
+ if (CONST_INT_P (operands[2]))
+ {
+ (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
+ count = INTVAL (operands[2]) & (single_width * 2 - 1);
+
+ if (count >= single_width)
+ {
+ emit_move_insn (high[0], low[1]);
+ emit_move_insn (low[0], const0_rtx);
+
+ if (count > single_width)
+ ix86_expand_ashl_const (high[0], count - single_width, mode);
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn ((mode == DImode
+ ? gen_x86_shld
+ : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
+ ix86_expand_ashl_const (low[0], count, mode);
+ }
+ return;
+ }
+
+ (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
+
+ if (operands[1] == const1_rtx)
+ {
+ /* Assuming we've chosen a QImode capable registers, then 1 << N
+ can be done with two 32/64-bit shifts, no branches, no cmoves. */
+ if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
+ {
+ rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+
+ ix86_expand_clear (low[0]);
+ ix86_expand_clear (high[0]);
+ emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
+
+ d = gen_lowpart (QImode, low[0]);
+ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+ s = gen_rtx_EQ (QImode, flags, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, d, s));
+
+ d = gen_lowpart (QImode, high[0]);
+ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
+ s = gen_rtx_NE (QImode, flags, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, d, s));
+ }
+
+ /* Otherwise, we can get the same results by manually performing
+ a bit extract operation on bit 5/6, and then performing the two
+ shifts. The two methods of getting 0/1 into low/high are exactly
+ the same size. Avoiding the shift in the bit extract case helps
+ pentium4 a bit; no one else seems to care much either way. */
+ else
+ {
+ rtx x;
+
+ if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
+ x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
+ else
+ x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
+ emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
+
+ emit_insn ((mode == DImode
+ ? gen_lshrsi3
+ : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
+ emit_insn ((mode == DImode
+ ? gen_andsi3
+ : gen_anddi3) (high[0], high[0], GEN_INT (1)));
+ emit_move_insn (low[0], high[0]);
+ emit_insn ((mode == DImode
+ ? gen_xorsi3
+ : gen_xordi3) (low[0], low[0], GEN_INT (1)));
+ }
+
+ emit_insn ((mode == DImode
+ ? gen_ashlsi3
+ : gen_ashldi3) (low[0], low[0], operands[2]));
+ emit_insn ((mode == DImode
+ ? gen_ashlsi3
+ : gen_ashldi3) (high[0], high[0], operands[2]));
+ return;
+ }
+
+ if (operands[1] == constm1_rtx)
+ {
+ /* For -1 << N, we can avoid the shld instruction, because we
+ know that we're shifting 0...31/63 ones into a -1. */
+ emit_move_insn (low[0], constm1_rtx);
+ if (optimize_insn_for_size_p ())
+ emit_move_insn (high[0], low[0]);
+ else
+ emit_move_insn (high[0], constm1_rtx);
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+
+ (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
+ emit_insn ((mode == DImode
+ ? gen_x86_shld
+ : gen_x86_64_shld) (high[0], low[0], operands[2]));
+ }
+
+ emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
+
+ if (TARGET_CMOVE && scratch)
+ {
+ ix86_expand_clear (scratch);
+ emit_insn ((mode == DImode
+ ? gen_x86_shift_adj_1
+ : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
+ scratch));
+ }
+ else
+ emit_insn ((mode == DImode
+ ? gen_x86_shift_adj_2
+ : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
+}
+
+void
+ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+ rtx low[2], high[2];
+ int count;
+ const int single_width = mode == DImode ? 32 : 64;
+
+ if (CONST_INT_P (operands[2]))
+ {
+ (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
+ count = INTVAL (operands[2]) & (single_width * 2 - 1);
+
+ if (count == single_width * 2 - 1)
+ {
+ emit_move_insn (high[0], high[1]);
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (high[0], high[0],
+ GEN_INT (single_width - 1)));
+ emit_move_insn (low[0], high[0]);
+
+ }
+ else if (count >= single_width)
+ {
+ emit_move_insn (low[0], high[1]);
+ emit_move_insn (high[0], low[0]);
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (high[0], high[0],
+ GEN_INT (single_width - 1)));
+ if (count > single_width)
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (low[0], low[0],
+ GEN_INT (count - single_width)));
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn ((mode == DImode
+ ? gen_x86_shrd
+ : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
+ }
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+
+ (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
+
+ emit_insn ((mode == DImode
+ ? gen_x86_shrd
+ : gen_x86_64_shrd) (low[0], high[0], operands[2]));
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (high[0], high[0], operands[2]));
+
+ if (TARGET_CMOVE && scratch)
+ {
+ emit_move_insn (scratch, high[0]);
+ emit_insn ((mode == DImode
+ ? gen_ashrsi3
+ : gen_ashrdi3) (scratch, scratch,
+ GEN_INT (single_width - 1)));
+ emit_insn ((mode == DImode
+ ? gen_x86_shift_adj_1
+ : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
+ scratch));
+ }
+ else
+ emit_insn ((mode == DImode
+ ? gen_x86_shift_adj_3
+ : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
+ }
+}
+
+void
+ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
+{
+ rtx low[2], high[2];
+ int count;
+ const int single_width = mode == DImode ? 32 : 64;
+
+ if (CONST_INT_P (operands[2]))
+ {
+ (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
+ count = INTVAL (operands[2]) & (single_width * 2 - 1);
+
+ if (count >= single_width)
+ {
+ emit_move_insn (low[0], high[1]);
+ ix86_expand_clear (high[0]);
+
+ if (count > single_width)
+ emit_insn ((mode == DImode
+ ? gen_lshrsi3
+ : gen_lshrdi3) (low[0], low[0],
+ GEN_INT (count - single_width)));
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn ((mode == DImode
+ ? gen_x86_shrd
+ : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
+ emit_insn ((mode == DImode
+ ? gen_lshrsi3
+ : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
+ }
+ }
+ else
+ {
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+
+ (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
+
+ emit_insn ((mode == DImode
+ ? gen_x86_shrd
+ : gen_x86_64_shrd) (low[0], high[0], operands[2]));
+ emit_insn ((mode == DImode
+ ? gen_lshrsi3
+ : gen_lshrdi3) (high[0], high[0], operands[2]));
+
+ /* Heh. By reversing the arguments, we can reuse this pattern. */
+ if (TARGET_CMOVE && scratch)
+ {
+ ix86_expand_clear (scratch);
+ emit_insn ((mode == DImode
+ ? gen_x86_shift_adj_1
+ : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
+ scratch));
+ }
+ else
+ emit_insn ((mode == DImode
+ ? gen_x86_shift_adj_2
+ : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
+ }
+}
+
+/* Predict just emitted jump instruction to be taken with probability PROB. */
+static void
+predict_jump (int prob)
+{
+ rtx insn = get_last_insn ();
+ gcc_assert (JUMP_P (insn));
+ REG_NOTES (insn)
+ = gen_rtx_EXPR_LIST (REG_BR_PROB,
+ GEN_INT (prob),
+ REG_NOTES (insn));
+}
+
+/* Helper function for the string operations below. Dest VARIABLE whether
+ it is aligned to VALUE bytes. If true, jump to the label. */
+static rtx
+ix86_expand_aligntest (rtx variable, int value, bool epilogue)
+{
+ rtx label = gen_label_rtx ();
+ rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
+ if (GET_MODE (variable) == DImode)
+ emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
+ else
+ emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
+ emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
+ 1, label);
+ if (epilogue)
+ predict_jump (REG_BR_PROB_BASE * 50 / 100);
+ else
+ predict_jump (REG_BR_PROB_BASE * 90 / 100);
+ return label;
+}
+
+/* Adjust COUNTER by the VALUE. */
+static void
+ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
+{
+ if (GET_MODE (countreg) == DImode)
+ emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
+ else
+ emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
+}
+
+/* Zero extend possibly SImode EXP to Pmode register. */
+rtx
+ix86_zero_extend_to_Pmode (rtx exp)
+{
+ rtx r;
+ if (GET_MODE (exp) == VOIDmode)
+ return force_reg (Pmode, exp);
+ if (GET_MODE (exp) == Pmode)
+ return copy_to_mode_reg (Pmode, exp);
+ r = gen_reg_rtx (Pmode);
+ emit_insn (gen_zero_extendsidi2 (r, exp));
+ return r;
+}
+
+/* Divide COUNTREG by SCALE. */
+static rtx
+scale_counter (rtx countreg, int scale)
+{
+ rtx sc;
+ rtx piece_size_mask;
+
+ if (scale == 1)
+ return countreg;
+ if (CONST_INT_P (countreg))
+ return GEN_INT (INTVAL (countreg) / scale);
+ gcc_assert (REG_P (countreg));
+
+ piece_size_mask = GEN_INT (scale - 1);
+ sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
+ GEN_INT (exact_log2 (scale)),
+ NULL, 1, OPTAB_DIRECT);
+ return sc;
+}
+
+/* Return mode for the memcpy/memset loop counter. Prefer SImode over
+ DImode for constant loop counts. */
+
+static enum machine_mode
+counter_mode (rtx count_exp)
+{
+ if (GET_MODE (count_exp) != VOIDmode)
+ return GET_MODE (count_exp);
+ if (GET_CODE (count_exp) != CONST_INT)
+ return Pmode;
+ if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
+ return DImode;
+ return SImode;
+}
+
+/* When SRCPTR is non-NULL, output simple loop to move memory
+ pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
+ overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
+ equivalent loop to set memory by VALUE (supposed to be in MODE).
+
+ The size is rounded down to whole number of chunk size moved at once.
+ SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
+
+
+static void
+expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
+ rtx destptr, rtx srcptr, rtx value,
+ rtx count, enum machine_mode mode, int unroll,
+ int expected_size)
+{
+ rtx out_label, top_label, iter, tmp;
+ enum machine_mode iter_mode = counter_mode (count);
+ rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
+ rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
+ rtx size;
+ rtx x_addr;
+ rtx y_addr;
+ int i;
+
+ top_label = gen_label_rtx ();
+ out_label = gen_label_rtx ();
+ iter = gen_reg_rtx (iter_mode);
+
+ size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
+ NULL, 1, OPTAB_DIRECT);
+ /* Those two should combine. */
+ if (piece_size == const1_rtx)
+ {
+ emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
+ true, out_label);
+ predict_jump (REG_BR_PROB_BASE * 10 / 100);
+ }
+ emit_move_insn (iter, const0_rtx);
+
+ emit_label (top_label);
+
+ tmp = convert_modes (Pmode, iter_mode, iter, true);
+ x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
+ destmem = change_address (destmem, mode, x_addr);
+
+ if (srcmem)
+ {
+ y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
+ srcmem = change_address (srcmem, mode, y_addr);
+
+ /* When unrolling for chips that reorder memory reads and writes,
+ we can save registers by using single temporary.
+ Also using 4 temporaries is overkill in 32bit mode. */
+ if (!TARGET_64BIT && 0)
+ {
+ for (i = 0; i < unroll; i++)
+ {
+ if (i)
+ {
+ destmem =
+ adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
+ srcmem =
+ adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
+ }
+ emit_move_insn (destmem, srcmem);
+ }
+ }
+ else
+ {
+ rtx tmpreg[4];
+ gcc_assert (unroll <= 4);
+ for (i = 0; i < unroll; i++)
+ {
+ tmpreg[i] = gen_reg_rtx (mode);
+ if (i)
+ {
+ srcmem =
+ adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
+ }
+ emit_move_insn (tmpreg[i], srcmem);
+ }
+ for (i = 0; i < unroll; i++)
+ {
+ if (i)
+ {
+ destmem =
+ adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
+ }
+ emit_move_insn (destmem, tmpreg[i]);
+ }
+ }
+ }
+ else
+ for (i = 0; i < unroll; i++)
+ {
+ if (i)
+ destmem =
+ adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
+ emit_move_insn (destmem, value);
+ }
+
+ tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
+ true, OPTAB_LIB_WIDEN);
+ if (tmp != iter)
+ emit_move_insn (iter, tmp);
+
+ emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
+ true, top_label);
+ if (expected_size != -1)
+ {
+ expected_size /= GET_MODE_SIZE (mode) * unroll;
+ if (expected_size == 0)
+ predict_jump (0);
+ else if (expected_size > REG_BR_PROB_BASE)
+ predict_jump (REG_BR_PROB_BASE - 1);
+ else
+ predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
+ }
+ else
+ predict_jump (REG_BR_PROB_BASE * 80 / 100);
+ iter = ix86_zero_extend_to_Pmode (iter);
+ tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
+ true, OPTAB_LIB_WIDEN);
+ if (tmp != destptr)
+ emit_move_insn (destptr, tmp);
+ if (srcptr)
+ {
+ tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
+ true, OPTAB_LIB_WIDEN);
+ if (tmp != srcptr)
+ emit_move_insn (srcptr, tmp);
+ }
+ emit_label (out_label);
+}
+
+/* Output "rep; mov" instruction.
+ Arguments have same meaning as for previous function */
+static void
+expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
+ rtx destptr, rtx srcptr,
+ rtx count,
+ enum machine_mode mode)
+{
+ rtx destexp;
+ rtx srcexp;
+ rtx countreg;
+
+ /* If the size is known, it is shorter to use rep movs. */
+ if (mode == QImode && CONST_INT_P (count)
+ && !(INTVAL (count) & 3))
+ mode = SImode;
+
+ if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
+ destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
+ if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
+ srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
+ countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
+ if (mode != QImode)
+ {
+ destexp = gen_rtx_ASHIFT (Pmode, countreg,
+ GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
+ destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
+ srcexp = gen_rtx_ASHIFT (Pmode, countreg,
+ GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
+ srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
+ }
+ else
+ {
+ destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
+ srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
+ }
+ if (CONST_INT_P (count))
+ {
+ count = GEN_INT (INTVAL (count)
+ & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
+ destmem = shallow_copy_rtx (destmem);
+ srcmem = shallow_copy_rtx (srcmem);
+ set_mem_size (destmem, count);
+ set_mem_size (srcmem, count);
+ }
+ else
+ {
+ if (MEM_SIZE (destmem))
+ set_mem_size (destmem, NULL_RTX);
+ if (MEM_SIZE (srcmem))
+ set_mem_size (srcmem, NULL_RTX);
+ }
+ emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
+ destexp, srcexp));
+}
+
+/* Output "rep; stos" instruction.
+ Arguments have same meaning as for previous function */
+static void
+expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
+ rtx count, enum machine_mode mode,
+ rtx orig_value)
+{
+ rtx destexp;
+ rtx countreg;
+
+ if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
+ destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
+ value = force_reg (mode, gen_lowpart (mode, value));
+ countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
+ if (mode != QImode)
+ {
+ destexp = gen_rtx_ASHIFT (Pmode, countreg,
+ GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
+ destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
+ }
+ else
+ destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
+ if (orig_value == const0_rtx && CONST_INT_P (count))
+ {
+ count = GEN_INT (INTVAL (count)
+ & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
+ destmem = shallow_copy_rtx (destmem);
+ set_mem_size (destmem, count);
+ }
+ else if (MEM_SIZE (destmem))
+ set_mem_size (destmem, NULL_RTX);
+ emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
+}
+
+static void
+emit_strmov (rtx destmem, rtx srcmem,
+ rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
+{
+ rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
+ rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
+ emit_insn (gen_strmov (destptr, dest, srcptr, src));
+}
+
+/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
+static void
+expand_movmem_epilogue (rtx destmem, rtx srcmem,
+ rtx destptr, rtx srcptr, rtx count, int max_size)
+{
+ rtx src, dest;
+ if (CONST_INT_P (count))
+ {
+ HOST_WIDE_INT countval = INTVAL (count);
+ int offset = 0;
+
+ if ((countval & 0x10) && max_size > 16)
+ {
+ if (TARGET_64BIT)
+ {
+ emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
+ emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
+ }
+ else
+ gcc_unreachable ();
+ offset += 16;
+ }
+ if ((countval & 0x08) && max_size > 8)
+ {
+ if (TARGET_64BIT)
+ emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
+ else
+ {
+ emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
+ emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
+ }
+ offset += 8;
+ }
+ if ((countval & 0x04) && max_size > 4)
+ {
+ emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
+ offset += 4;
+ }
+ if ((countval & 0x02) && max_size > 2)
+ {
+ emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
+ offset += 2;
+ }
+ if ((countval & 0x01) && max_size > 1)
+ {
+ emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
+ offset += 1;
+ }
+ return;
+ }
+ if (max_size > 8)
+ {
+ count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
+ count, 1, OPTAB_DIRECT);
+ expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
+ count, QImode, 1, 4);
+ return;
+ }
+
+ /* When there are stringops, we can cheaply increase dest and src pointers.
+ Otherwise we save code size by maintaining offset (zero is readily
+ available from preceding rep operation) and using x86 addressing modes.
+ */
+ if (TARGET_SINGLE_STRINGOP)
+ {
+ if (max_size > 4)
+ {
+ rtx label = ix86_expand_aligntest (count, 4, true);
+ src = change_address (srcmem, SImode, srcptr);
+ dest = change_address (destmem, SImode, destptr);
+ emit_insn (gen_strmov (destptr, dest, srcptr, src));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (max_size > 2)
+ {
+ rtx label = ix86_expand_aligntest (count, 2, true);
+ src = change_address (srcmem, HImode, srcptr);
+ dest = change_address (destmem, HImode, destptr);
+ emit_insn (gen_strmov (destptr, dest, srcptr, src));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (max_size > 1)
+ {
+ rtx label = ix86_expand_aligntest (count, 1, true);
+ src = change_address (srcmem, QImode, srcptr);
+ dest = change_address (destmem, QImode, destptr);
+ emit_insn (gen_strmov (destptr, dest, srcptr, src));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ }
+ else
+ {
+ rtx offset = force_reg (Pmode, const0_rtx);
+ rtx tmp;
+
+ if (max_size > 4)
+ {
+ rtx label = ix86_expand_aligntest (count, 4, true);
+ src = change_address (srcmem, SImode, srcptr);
+ dest = change_address (destmem, SImode, destptr);
+ emit_move_insn (dest, src);
+ tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
+ true, OPTAB_LIB_WIDEN);
+ if (tmp != offset)
+ emit_move_insn (offset, tmp);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (max_size > 2)
+ {
+ rtx label = ix86_expand_aligntest (count, 2, true);
+ tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
+ src = change_address (srcmem, HImode, tmp);
+ tmp = gen_rtx_PLUS (Pmode, destptr, offset);
+ dest = change_address (destmem, HImode, tmp);
+ emit_move_insn (dest, src);
+ tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
+ true, OPTAB_LIB_WIDEN);
+ if (tmp != offset)
+ emit_move_insn (offset, tmp);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (max_size > 1)
+ {
+ rtx label = ix86_expand_aligntest (count, 1, true);
+ tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
+ src = change_address (srcmem, QImode, tmp);
+ tmp = gen_rtx_PLUS (Pmode, destptr, offset);
+ dest = change_address (destmem, QImode, tmp);
+ emit_move_insn (dest, src);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ }
+}
+
+/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
+static void
+expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
+ rtx count, int max_size)
+{
+ count =
+ expand_simple_binop (counter_mode (count), AND, count,
+ GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
+ expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
+ gen_lowpart (QImode, value), count, QImode,
+ 1, max_size / 2);
+}
+
+/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
+static void
+expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
+{
+ rtx dest;
+
+ if (CONST_INT_P (count))
+ {
+ HOST_WIDE_INT countval = INTVAL (count);
+ int offset = 0;
+
+ if ((countval & 0x10) && max_size > 16)
+ {
+ if (TARGET_64BIT)
+ {
+ dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
+ emit_insn (gen_strset (destptr, dest, value));
+ dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
+ emit_insn (gen_strset (destptr, dest, value));
+ }
+ else
+ gcc_unreachable ();
+ offset += 16;
+ }
+ if ((countval & 0x08) && max_size > 8)
+ {
+ if (TARGET_64BIT)
+ {
+ dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
+ emit_insn (gen_strset (destptr, dest, value));
+ }
+ else
+ {
+ dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
+ emit_insn (gen_strset (destptr, dest, value));
+ dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
+ emit_insn (gen_strset (destptr, dest, value));
+ }
+ offset += 8;
+ }
+ if ((countval & 0x04) && max_size > 4)
+ {
+ dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
+ emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
+ offset += 4;
+ }
+ if ((countval & 0x02) && max_size > 2)
+ {
+ dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
+ emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
+ offset += 2;
+ }
+ if ((countval & 0x01) && max_size > 1)
+ {
+ dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
+ emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
+ offset += 1;
+ }
+ return;
+ }
+ if (max_size > 32)
+ {
+ expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
+ return;
+ }
+ if (max_size > 16)
+ {
+ rtx label = ix86_expand_aligntest (count, 16, true);
+ if (TARGET_64BIT)
+ {
+ dest = change_address (destmem, DImode, destptr);
+ emit_insn (gen_strset (destptr, dest, value));
+ emit_insn (gen_strset (destptr, dest, value));
+ }
+ else
+ {
+ dest = change_address (destmem, SImode, destptr);
+ emit_insn (gen_strset (destptr, dest, value));
+ emit_insn (gen_strset (destptr, dest, value));
+ emit_insn (gen_strset (destptr, dest, value));
+ emit_insn (gen_strset (destptr, dest, value));
+ }
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (max_size > 8)
+ {
+ rtx label = ix86_expand_aligntest (count, 8, true);
+ if (TARGET_64BIT)
+ {
+ dest = change_address (destmem, DImode, destptr);
+ emit_insn (gen_strset (destptr, dest, value));
+ }
+ else
+ {
+ dest = change_address (destmem, SImode, destptr);
+ emit_insn (gen_strset (destptr, dest, value));
+ emit_insn (gen_strset (destptr, dest, value));
+ }
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (max_size > 4)
+ {
+ rtx label = ix86_expand_aligntest (count, 4, true);
+ dest = change_address (destmem, SImode, destptr);
+ emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (max_size > 2)
+ {
+ rtx label = ix86_expand_aligntest (count, 2, true);
+ dest = change_address (destmem, HImode, destptr);
+ emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (max_size > 1)
+ {
+ rtx label = ix86_expand_aligntest (count, 1, true);
+ dest = change_address (destmem, QImode, destptr);
+ emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+}
+
+/* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
+ DESIRED_ALIGNMENT. */
+static void
+expand_movmem_prologue (rtx destmem, rtx srcmem,
+ rtx destptr, rtx srcptr, rtx count,
+ int align, int desired_alignment)
+{
+ if (align <= 1 && desired_alignment > 1)
+ {
+ rtx label = ix86_expand_aligntest (destptr, 1, false);
+ srcmem = change_address (srcmem, QImode, srcptr);
+ destmem = change_address (destmem, QImode, destptr);
+ emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
+ ix86_adjust_counter (count, 1);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 2 && desired_alignment > 2)
+ {
+ rtx label = ix86_expand_aligntest (destptr, 2, false);
+ srcmem = change_address (srcmem, HImode, srcptr);
+ destmem = change_address (destmem, HImode, destptr);
+ emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
+ ix86_adjust_counter (count, 2);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 4 && desired_alignment > 4)
+ {
+ rtx label = ix86_expand_aligntest (destptr, 4, false);
+ srcmem = change_address (srcmem, SImode, srcptr);
+ destmem = change_address (destmem, SImode, destptr);
+ emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
+ ix86_adjust_counter (count, 4);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ gcc_assert (desired_alignment <= 8);
+}
+
+/* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
+ ALIGN_BYTES is how many bytes need to be copied. */
+static rtx
+expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
+ int desired_align, int align_bytes)
+{
+ rtx src = *srcp;
+ rtx src_size, dst_size;
+ int off = 0;
+ int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
+ if (src_align_bytes >= 0)
+ src_align_bytes = desired_align - src_align_bytes;
+ src_size = MEM_SIZE (src);
+ dst_size = MEM_SIZE (dst);
+ if (align_bytes & 1)
+ {
+ dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
+ src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
+ off = 1;
+ emit_insn (gen_strmov (destreg, dst, srcreg, src));
+ }
+ if (align_bytes & 2)
+ {
+ dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
+ src = adjust_automodify_address_nv (src, HImode, srcreg, off);
+ if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
+ set_mem_align (dst, 2 * BITS_PER_UNIT);
+ if (src_align_bytes >= 0
+ && (src_align_bytes & 1) == (align_bytes & 1)
+ && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
+ set_mem_align (src, 2 * BITS_PER_UNIT);
+ off = 2;
+ emit_insn (gen_strmov (destreg, dst, srcreg, src));
+ }
+ if (align_bytes & 4)
+ {
+ dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
+ src = adjust_automodify_address_nv (src, SImode, srcreg, off);
+ if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
+ set_mem_align (dst, 4 * BITS_PER_UNIT);
+ if (src_align_bytes >= 0)
+ {
+ unsigned int src_align = 0;
+ if ((src_align_bytes & 3) == (align_bytes & 3))
+ src_align = 4;
+ else if ((src_align_bytes & 1) == (align_bytes & 1))
+ src_align = 2;
+ if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
+ set_mem_align (src, src_align * BITS_PER_UNIT);
+ }
+ off = 4;
+ emit_insn (gen_strmov (destreg, dst, srcreg, src));
+ }
+ dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
+ src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
+ if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
+ set_mem_align (dst, desired_align * BITS_PER_UNIT);
+ if (src_align_bytes >= 0)
+ {
+ unsigned int src_align = 0;
+ if ((src_align_bytes & 7) == (align_bytes & 7))
+ src_align = 8;
+ else if ((src_align_bytes & 3) == (align_bytes & 3))
+ src_align = 4;
+ else if ((src_align_bytes & 1) == (align_bytes & 1))
+ src_align = 2;
+ if (src_align > (unsigned int) desired_align)
+ src_align = desired_align;
+ if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
+ set_mem_align (src, src_align * BITS_PER_UNIT);
+ }
+ if (dst_size)
+ set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
+ if (src_size)
+ set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
+ *srcp = src;
+ return dst;
+}
+
+/* Set enough from DEST to align DEST known to by aligned by ALIGN to
+ DESIRED_ALIGNMENT. */
+static void
+expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
+ int align, int desired_alignment)
+{
+ if (align <= 1 && desired_alignment > 1)
+ {
+ rtx label = ix86_expand_aligntest (destptr, 1, false);
+ destmem = change_address (destmem, QImode, destptr);
+ emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
+ ix86_adjust_counter (count, 1);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 2 && desired_alignment > 2)
+ {
+ rtx label = ix86_expand_aligntest (destptr, 2, false);
+ destmem = change_address (destmem, HImode, destptr);
+ emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
+ ix86_adjust_counter (count, 2);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 4 && desired_alignment > 4)
+ {
+ rtx label = ix86_expand_aligntest (destptr, 4, false);
+ destmem = change_address (destmem, SImode, destptr);
+ emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
+ ix86_adjust_counter (count, 4);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ gcc_assert (desired_alignment <= 8);
+}
+
+/* Set enough from DST to align DST known to by aligned by ALIGN to
+ DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
+static rtx
+expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
+ int desired_align, int align_bytes)
+{
+ int off = 0;
+ rtx dst_size = MEM_SIZE (dst);
+ if (align_bytes & 1)
+ {
+ dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
+ off = 1;
+ emit_insn (gen_strset (destreg, dst,
+ gen_lowpart (QImode, value)));
+ }
+ if (align_bytes & 2)
+ {
+ dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
+ if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
+ set_mem_align (dst, 2 * BITS_PER_UNIT);
+ off = 2;
+ emit_insn (gen_strset (destreg, dst,
+ gen_lowpart (HImode, value)));
+ }
+ if (align_bytes & 4)
+ {
+ dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
+ if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
+ set_mem_align (dst, 4 * BITS_PER_UNIT);
+ off = 4;
+ emit_insn (gen_strset (destreg, dst,
+ gen_lowpart (SImode, value)));
+ }
+ dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
+ if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
+ set_mem_align (dst, desired_align * BITS_PER_UNIT);
+ if (dst_size)
+ set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
+ return dst;
+}
+
+/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
+static enum stringop_alg
+decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
+ int *dynamic_check)
+{
+ const struct stringop_algs * algs;
+ bool optimize_for_speed;
+ /* Algorithms using the rep prefix want at least edi and ecx;
+ additionally, memset wants eax and memcpy wants esi. Don't
+ consider such algorithms if the user has appropriated those
+ registers for their own purposes. */
+ bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
+ || (memset
+ ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
+
+#define ALG_USABLE_P(alg) (rep_prefix_usable \
+ || (alg != rep_prefix_1_byte \
+ && alg != rep_prefix_4_byte \
+ && alg != rep_prefix_8_byte))
+ const struct processor_costs *cost;
+
+ /* Even if the string operation call is cold, we still might spend a lot
+ of time processing large blocks. */
+ if (optimize_function_for_size_p (cfun)
+ || (optimize_insn_for_size_p ()
+ && expected_size != -1 && expected_size < 256))
+ optimize_for_speed = false;
+ else
+ optimize_for_speed = true;
+
+ cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
+
+ *dynamic_check = -1;
+ if (memset)
+ algs = &cost->memset[TARGET_64BIT != 0];
+ else
+ algs = &cost->memcpy[TARGET_64BIT != 0];
+ if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
+ return stringop_alg;
+ /* rep; movq or rep; movl is the smallest variant. */
+ else if (!optimize_for_speed)
+ {
+ if (!count || (count & 3))
+ return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
+ else
+ return rep_prefix_usable ? rep_prefix_4_byte : loop;
+ }
+ /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
+ */
+ else if (expected_size != -1 && expected_size < 4)
+ return loop_1_byte;
+ else if (expected_size != -1)
+ {
+ unsigned int i;
+ enum stringop_alg alg = libcall;
+ for (i = 0; i < NAX_STRINGOP_ALGS; i++)
+ {
+ /* We get here if the algorithms that were not libcall-based
+ were rep-prefix based and we are unable to use rep prefixes
+ based on global register usage. Break out of the loop and
+ use the heuristic below. */
+ if (algs->size[i].max == 0)
+ break;
+ if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
+ {
+ enum stringop_alg candidate = algs->size[i].alg;
+
+ if (candidate != libcall && ALG_USABLE_P (candidate))
+ alg = candidate;
+ /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
+ last non-libcall inline algorithm. */
+ if (TARGET_INLINE_ALL_STRINGOPS)
+ {
+ /* When the current size is best to be copied by a libcall,
+ but we are still forced to inline, run the heuristic below
+ that will pick code for medium sized blocks. */
+ if (alg != libcall)
+ return alg;
+ break;
+ }
+ else if (ALG_USABLE_P (candidate))
+ return candidate;
+ }
+ }
+ gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
+ }
+ /* When asked to inline the call anyway, try to pick meaningful choice.
+ We look for maximal size of block that is faster to copy by hand and
+ take blocks of at most of that size guessing that average size will
+ be roughly half of the block.
+
+ If this turns out to be bad, we might simply specify the preferred
+ choice in ix86_costs. */
+ if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
+ && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
+ {
+ int max = -1;
+ enum stringop_alg alg;
+ int i;
+ bool any_alg_usable_p = true;
+
+ for (i = 0; i < NAX_STRINGOP_ALGS; i++)
+ {
+ enum stringop_alg candidate = algs->size[i].alg;
+ any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
+
+ if (candidate != libcall && candidate
+ && ALG_USABLE_P (candidate))
+ max = algs->size[i].max;
+ }
+ /* If there aren't any usable algorithms, then recursing on
+ smaller sizes isn't going to find anything. Just return the
+ simple byte-at-a-time copy loop. */
+ if (!any_alg_usable_p)
+ {
+ /* Pick something reasonable. */
+ if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
+ *dynamic_check = 128;
+ return loop_1_byte;
+ }
+ if (max == -1)
+ max = 4096;
+ alg = decide_alg (count, max / 2, memset, dynamic_check);
+ gcc_assert (*dynamic_check == -1);
+ gcc_assert (alg != libcall);
+ if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
+ *dynamic_check = max;
+ return alg;
+ }
+ return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
+#undef ALG_USABLE_P
+}
+
+/* Decide on alignment. We know that the operand is already aligned to ALIGN
+ (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
+static int
+decide_alignment (int align,
+ enum stringop_alg alg,
+ int expected_size)
+{
+ int desired_align = 0;
+ switch (alg)
+ {
+ case no_stringop:
+ gcc_unreachable ();
+ case loop:
+ case unrolled_loop:
+ desired_align = GET_MODE_SIZE (Pmode);
+ break;
+ case rep_prefix_8_byte:
+ desired_align = 8;
+ break;
+ case rep_prefix_4_byte:
+ /* PentiumPro has special logic triggering for 8 byte aligned blocks.
+ copying whole cacheline at once. */
+ if (TARGET_PENTIUMPRO)
+ desired_align = 8;
+ else
+ desired_align = 4;
+ break;
+ case rep_prefix_1_byte:
+ /* PentiumPro has special logic triggering for 8 byte aligned blocks.
+ copying whole cacheline at once. */
+ if (TARGET_PENTIUMPRO)
+ desired_align = 8;
+ else
+ desired_align = 1;
+ break;
+ case loop_1_byte:
+ desired_align = 1;
+ break;
+ case libcall:
+ return 0;
+ }
+
+ if (optimize_size)
+ desired_align = 1;
+ if (desired_align < align)
+ desired_align = align;
+ if (expected_size != -1 && expected_size < 4)
+ desired_align = align;
+ return desired_align;
+}
+
+/* Return the smallest power of 2 greater than VAL. */
+static int
+smallest_pow2_greater_than (int val)
+{
+ int ret = 1;
+ while (ret <= val)
+ ret <<= 1;
+ return ret;
+}
+
+/* Expand string move (memcpy) operation. Use i386 string operations when
+ profitable. expand_setmem contains similar code. The code depends upon
+ architecture, block size and alignment, but always has the same
+ overall structure:
+
+ 1) Prologue guard: Conditional that jumps up to epilogues for small
+ blocks that can be handled by epilogue alone. This is faster but
+ also needed for correctness, since prologue assume the block is larger
+ than the desired alignment.
+
+ Optional dynamic check for size and libcall for large
+ blocks is emitted here too, with -minline-stringops-dynamically.
+
+ 2) Prologue: copy first few bytes in order to get destination aligned
+ to DESIRED_ALIGN. It is emitted only when ALIGN is less than
+ DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
+ We emit either a jump tree on power of two sized blocks, or a byte loop.
+
+ 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
+ with specified algorithm.
+
+ 4) Epilogue: code copying tail of the block that is too small to be
+ handled by main body (or up to size guarded by prologue guard). */
+
+int
+ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
+ rtx expected_align_exp, rtx expected_size_exp)
+{
+ rtx destreg;
+ rtx srcreg;
+ rtx label = NULL;
+ rtx tmp;
+ rtx jump_around_label = NULL;
+ HOST_WIDE_INT align = 1;
+ unsigned HOST_WIDE_INT count = 0;
+ HOST_WIDE_INT expected_size = -1;
+ int size_needed = 0, epilogue_size_needed;
+ int desired_align = 0, align_bytes = 0;
+ enum stringop_alg alg;
+ int dynamic_check;
+ bool need_zero_guard = false;
+
+ if (CONST_INT_P (align_exp))
+ align = INTVAL (align_exp);
+ /* i386 can do misaligned access on reasonably increased cost. */
+ if (CONST_INT_P (expected_align_exp)
+ && INTVAL (expected_align_exp) > align)
+ align = INTVAL (expected_align_exp);
+ /* ALIGN is the minimum of destination and source alignment, but we care here
+ just about destination alignment. */
+ else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
+ align = MEM_ALIGN (dst) / BITS_PER_UNIT;
+
+ if (CONST_INT_P (count_exp))
+ count = expected_size = INTVAL (count_exp);
+ if (CONST_INT_P (expected_size_exp) && count == 0)
+ expected_size = INTVAL (expected_size_exp);
+
+ /* Make sure we don't need to care about overflow later on. */
+ if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
+ return 0;
+
+ /* Step 0: Decide on preferred algorithm, desired alignment and
+ size of chunks to be copied by main loop. */
+
+ alg = decide_alg (count, expected_size, false, &dynamic_check);
+ desired_align = decide_alignment (align, alg, expected_size);
+
+ if (!TARGET_ALIGN_STRINGOPS)
+ align = desired_align;
+
+ if (alg == libcall)
+ return 0;
+ gcc_assert (alg != no_stringop);
+ if (!count)
+ count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
+ destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+ srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
+ switch (alg)
+ {
+ case libcall:
+ case no_stringop:
+ gcc_unreachable ();
+ case loop:
+ need_zero_guard = true;
+ size_needed = GET_MODE_SIZE (Pmode);
+ break;
+ case unrolled_loop:
+ need_zero_guard = true;
+ size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
+ break;
+ case rep_prefix_8_byte:
+ size_needed = 8;
+ break;
+ case rep_prefix_4_byte:
+ size_needed = 4;
+ break;
+ case rep_prefix_1_byte:
+ size_needed = 1;
+ break;
+ case loop_1_byte:
+ need_zero_guard = true;
+ size_needed = 1;
+ break;
+ }
+
+ epilogue_size_needed = size_needed;
+
+ /* Step 1: Prologue guard. */
+
+ /* Alignment code needs count to be in register. */
+ if (CONST_INT_P (count_exp) && desired_align > align)
+ {
+ if (INTVAL (count_exp) > desired_align
+ && INTVAL (count_exp) > size_needed)
+ {
+ align_bytes
+ = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
+ if (align_bytes <= 0)
+ align_bytes = 0;
+ else
+ align_bytes = desired_align - align_bytes;
+ }
+ if (align_bytes == 0)
+ count_exp = force_reg (counter_mode (count_exp), count_exp);
+ }
+ gcc_assert (desired_align >= 1 && align >= 1);
+
+ /* Ensure that alignment prologue won't copy past end of block. */
+ if (size_needed > 1 || (desired_align > 1 && desired_align > align))
+ {
+ epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
+ /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
+ Make sure it is power of 2. */
+ epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
+
+ if (count)
+ {
+ if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
+ {
+ /* If main algorithm works on QImode, no epilogue is needed.
+ For small sizes just don't align anything. */
+ if (size_needed == 1)
+ desired_align = align;
+ else
+ goto epilogue;
+ }
+ }
+ else
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (count_exp,
+ GEN_INT (epilogue_size_needed),
+ LTU, 0, counter_mode (count_exp), 1, label);
+ if (expected_size == -1 || expected_size < epilogue_size_needed)
+ predict_jump (REG_BR_PROB_BASE * 60 / 100);
+ else
+ predict_jump (REG_BR_PROB_BASE * 20 / 100);
+ }
+ }
+
+ /* Emit code to decide on runtime whether library call or inline should be
+ used. */
+ if (dynamic_check != -1)
+ {
+ if (CONST_INT_P (count_exp))
+ {
+ if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
+ {
+ emit_block_move_via_libcall (dst, src, count_exp, false);
+ count_exp = const0_rtx;
+ goto epilogue;
+ }
+ }
+ else
+ {
+ rtx hot_label = gen_label_rtx ();
+ jump_around_label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
+ LEU, 0, GET_MODE (count_exp), 1, hot_label);
+ predict_jump (REG_BR_PROB_BASE * 90 / 100);
+ emit_block_move_via_libcall (dst, src, count_exp, false);
+ emit_jump (jump_around_label);
+ emit_label (hot_label);
+ }
+ }
+
+ /* Step 2: Alignment prologue. */
+
+ if (desired_align > align)
+ {
+ if (align_bytes == 0)
+ {
+ /* Except for the first move in epilogue, we no longer know
+ constant offset in aliasing info. It don't seems to worth
+ the pain to maintain it for the first move, so throw away
+ the info early. */
+ src = change_address (src, BLKmode, srcreg);
+ dst = change_address (dst, BLKmode, destreg);
+ expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
+ desired_align);
+ }
+ else
+ {
+ /* If we know how many bytes need to be stored before dst is
+ sufficiently aligned, maintain aliasing info accurately. */
+ dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
+ desired_align, align_bytes);
+ count_exp = plus_constant (count_exp, -align_bytes);
+ count -= align_bytes;
+ }
+ if (need_zero_guard
+ && (count < (unsigned HOST_WIDE_INT) size_needed
+ || (align_bytes == 0
+ && count < ((unsigned HOST_WIDE_INT) size_needed
+ + desired_align - align))))
+ {
+ /* It is possible that we copied enough so the main loop will not
+ execute. */
+ gcc_assert (size_needed > 1);
+ if (label == NULL_RTX)
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (count_exp,
+ GEN_INT (size_needed),
+ LTU, 0, counter_mode (count_exp), 1, label);
+ if (expected_size == -1
+ || expected_size < (desired_align - align) / 2 + size_needed)
+ predict_jump (REG_BR_PROB_BASE * 20 / 100);
+ else
+ predict_jump (REG_BR_PROB_BASE * 60 / 100);
+ }
+ }
+ if (label && size_needed == 1)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ label = NULL;
+ epilogue_size_needed = 1;
+ }
+ else if (label == NULL_RTX)
+ epilogue_size_needed = size_needed;
+
+ /* Step 3: Main loop. */
+
+ switch (alg)
+ {
+ case libcall:
+ case no_stringop:
+ gcc_unreachable ();
+ case loop_1_byte:
+ expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
+ count_exp, QImode, 1, expected_size);
+ break;
+ case loop:
+ expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
+ count_exp, Pmode, 1, expected_size);
+ break;
+ case unrolled_loop:
+ /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
+ registers for 4 temporaries anyway. */
+ expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
+ count_exp, Pmode, TARGET_64BIT ? 4 : 2,
+ expected_size);
+ break;
+ case rep_prefix_8_byte:
+ expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
+ DImode);
+ break;
+ case rep_prefix_4_byte:
+ expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
+ SImode);
+ break;
+ case rep_prefix_1_byte:
+ expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
+ QImode);
+ break;
+ }
+ /* Adjust properly the offset of src and dest memory for aliasing. */
+ if (CONST_INT_P (count_exp))
+ {
+ src = adjust_automodify_address_nv (src, BLKmode, srcreg,
+ (count / size_needed) * size_needed);
+ dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
+ (count / size_needed) * size_needed);
+ }
+ else
+ {
+ src = change_address (src, BLKmode, srcreg);
+ dst = change_address (dst, BLKmode, destreg);
+ }
+
+ /* Step 4: Epilogue to copy the remaining bytes. */
+ epilogue:
+ if (label)
+ {
+ /* When the main loop is done, COUNT_EXP might hold original count,
+ while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
+ Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
+ bytes. Compensate if needed. */
+
+ if (size_needed < epilogue_size_needed)
+ {
+ tmp =
+ expand_simple_binop (counter_mode (count_exp), AND, count_exp,
+ GEN_INT (size_needed - 1), count_exp, 1,
+ OPTAB_DIRECT);
+ if (tmp != count_exp)
+ emit_move_insn (count_exp, tmp);
+ }
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+
+ if (count_exp != const0_rtx && epilogue_size_needed > 1)
+ expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
+ epilogue_size_needed);
+ if (jump_around_label)
+ emit_label (jump_around_label);
+ return 1;
+}
+
+/* Helper function for memcpy. For QImode value 0xXY produce
+ 0xXYXYXYXY of wide specified by MODE. This is essentially
+ a * 0x10101010, but we can do slightly better than
+ synth_mult by unwinding the sequence by hand on CPUs with
+ slow multiply. */
+static rtx
+promote_duplicated_reg (enum machine_mode mode, rtx val)
+{
+ enum machine_mode valmode = GET_MODE (val);
+ rtx tmp;
+ int nops = mode == DImode ? 3 : 2;
+
+ gcc_assert (mode == SImode || mode == DImode);
+ if (val == const0_rtx)
+ return copy_to_mode_reg (mode, const0_rtx);
+ if (CONST_INT_P (val))
+ {
+ HOST_WIDE_INT v = INTVAL (val) & 255;
+
+ v |= v << 8;
+ v |= v << 16;
+ if (mode == DImode)
+ v |= (v << 16) << 16;
+ return copy_to_mode_reg (mode, gen_int_mode (v, mode));
+ }
+
+ if (valmode == VOIDmode)
+ valmode = QImode;
+ if (valmode != QImode)
+ val = gen_lowpart (QImode, val);
+ if (mode == QImode)
+ return val;
+ if (!TARGET_PARTIAL_REG_STALL)
+ nops--;
+ if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
+ + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
+ <= (ix86_cost->shift_const + ix86_cost->add) * nops
+ + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
+ {
+ rtx reg = convert_modes (mode, QImode, val, true);
+ tmp = promote_duplicated_reg (mode, const1_rtx);
+ return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
+ OPTAB_DIRECT);
+ }
+ else
+ {
+ rtx reg = convert_modes (mode, QImode, val, true);
+
+ if (!TARGET_PARTIAL_REG_STALL)
+ if (mode == SImode)
+ emit_insn (gen_movsi_insv_1 (reg, reg));
+ else
+ emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
+ else
+ {
+ tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
+ NULL, 1, OPTAB_DIRECT);
+ reg =
+ expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
+ }
+ tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
+ NULL, 1, OPTAB_DIRECT);
+ reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
+ if (mode == SImode)
+ return reg;
+ tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
+ NULL, 1, OPTAB_DIRECT);
+ reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
+ return reg;
+ }
+}
+
+/* Duplicate value VAL using promote_duplicated_reg into maximal size that will
+ be needed by main loop copying SIZE_NEEDED chunks and prologue getting
+ alignment from ALIGN to DESIRED_ALIGN. */
+static rtx
+promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
+{
+ rtx promoted_val;
+
+ if (TARGET_64BIT
+ && (size_needed > 4 || (desired_align > align && desired_align > 4)))
+ promoted_val = promote_duplicated_reg (DImode, val);
+ else if (size_needed > 2 || (desired_align > align && desired_align > 2))
+ promoted_val = promote_duplicated_reg (SImode, val);
+ else if (size_needed > 1 || (desired_align > align && desired_align > 1))
+ promoted_val = promote_duplicated_reg (HImode, val);
+ else
+ promoted_val = val;
+
+ return promoted_val;
+}
+
+/* Expand string clear operation (bzero). Use i386 string operations when
+ profitable. See expand_movmem comment for explanation of individual
+ steps performed. */
+int
+ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
+ rtx expected_align_exp, rtx expected_size_exp)
+{
+ rtx destreg;
+ rtx label = NULL;
+ rtx tmp;
+ rtx jump_around_label = NULL;
+ HOST_WIDE_INT align = 1;
+ unsigned HOST_WIDE_INT count = 0;
+ HOST_WIDE_INT expected_size = -1;
+ int size_needed = 0, epilogue_size_needed;
+ int desired_align = 0, align_bytes = 0;
+ enum stringop_alg alg;
+ rtx promoted_val = NULL;
+ bool force_loopy_epilogue = false;
+ int dynamic_check;
+ bool need_zero_guard = false;
+
+ if (CONST_INT_P (align_exp))
+ align = INTVAL (align_exp);
+ /* i386 can do misaligned access on reasonably increased cost. */
+ if (CONST_INT_P (expected_align_exp)
+ && INTVAL (expected_align_exp) > align)
+ align = INTVAL (expected_align_exp);
+ if (CONST_INT_P (count_exp))
+ count = expected_size = INTVAL (count_exp);
+ if (CONST_INT_P (expected_size_exp) && count == 0)
+ expected_size = INTVAL (expected_size_exp);
+
+ /* Make sure we don't need to care about overflow later on. */
+ if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
+ return 0;
+
+ /* Step 0: Decide on preferred algorithm, desired alignment and
+ size of chunks to be copied by main loop. */
+
+ alg = decide_alg (count, expected_size, true, &dynamic_check);
+ desired_align = decide_alignment (align, alg, expected_size);
+
+ if (!TARGET_ALIGN_STRINGOPS)
+ align = desired_align;
+
+ if (alg == libcall)
+ return 0;
+ gcc_assert (alg != no_stringop);
+ if (!count)
+ count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
+ destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+ switch (alg)
+ {
+ case libcall:
+ case no_stringop:
+ gcc_unreachable ();
+ case loop:
+ need_zero_guard = true;
+ size_needed = GET_MODE_SIZE (Pmode);
+ break;
+ case unrolled_loop:
+ need_zero_guard = true;
+ size_needed = GET_MODE_SIZE (Pmode) * 4;
+ break;
+ case rep_prefix_8_byte:
+ size_needed = 8;
+ break;
+ case rep_prefix_4_byte:
+ size_needed = 4;
+ break;
+ case rep_prefix_1_byte:
+ size_needed = 1;
+ break;
+ case loop_1_byte:
+ need_zero_guard = true;
+ size_needed = 1;
+ break;
+ }
+ epilogue_size_needed = size_needed;
+
+ /* Step 1: Prologue guard. */
+
+ /* Alignment code needs count to be in register. */
+ if (CONST_INT_P (count_exp) && desired_align > align)
+ {
+ if (INTVAL (count_exp) > desired_align
+ && INTVAL (count_exp) > size_needed)
+ {
+ align_bytes
+ = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
+ if (align_bytes <= 0)
+ align_bytes = 0;
+ else
+ align_bytes = desired_align - align_bytes;
+ }
+ if (align_bytes == 0)
+ {
+ enum machine_mode mode = SImode;
+ if (TARGET_64BIT && (count & ~0xffffffff))
+ mode = DImode;
+ count_exp = force_reg (mode, count_exp);
+ }
+ }
+ /* Do the cheap promotion to allow better CSE across the
+ main loop and epilogue (ie one load of the big constant in the
+ front of all code. */
+ if (CONST_INT_P (val_exp))
+ promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
+ desired_align, align);
+ /* Ensure that alignment prologue won't copy past end of block. */
+ if (size_needed > 1 || (desired_align > 1 && desired_align > align))
+ {
+ epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
+ /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
+ Make sure it is power of 2. */
+ epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
+
+ /* To improve performance of small blocks, we jump around the VAL
+ promoting mode. This mean that if the promoted VAL is not constant,
+ we might not use it in the epilogue and have to use byte
+ loop variant. */
+ if (epilogue_size_needed > 2 && !promoted_val)
+ force_loopy_epilogue = true;
+ if (count)
+ {
+ if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
+ {
+ /* If main algorithm works on QImode, no epilogue is needed.
+ For small sizes just don't align anything. */
+ if (size_needed == 1)
+ desired_align = align;
+ else
+ goto epilogue;
+ }
+ }
+ else
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (count_exp,
+ GEN_INT (epilogue_size_needed),
+ LTU, 0, counter_mode (count_exp), 1, label);
+ if (expected_size == -1 || expected_size <= epilogue_size_needed)
+ predict_jump (REG_BR_PROB_BASE * 60 / 100);
+ else
+ predict_jump (REG_BR_PROB_BASE * 20 / 100);
+ }
+ }
+ if (dynamic_check != -1)
+ {
+ rtx hot_label = gen_label_rtx ();
+ jump_around_label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
+ LEU, 0, counter_mode (count_exp), 1, hot_label);
+ predict_jump (REG_BR_PROB_BASE * 90 / 100);
+ set_storage_via_libcall (dst, count_exp, val_exp, false);
+ emit_jump (jump_around_label);
+ emit_label (hot_label);
+ }
+
+ /* Step 2: Alignment prologue. */
+
+ /* Do the expensive promotion once we branched off the small blocks. */
+ if (!promoted_val)
+ promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
+ desired_align, align);
+ gcc_assert (desired_align >= 1 && align >= 1);
+
+ if (desired_align > align)
+ {
+ if (align_bytes == 0)
+ {
+ /* Except for the first move in epilogue, we no longer know
+ constant offset in aliasing info. It don't seems to worth
+ the pain to maintain it for the first move, so throw away
+ the info early. */
+ dst = change_address (dst, BLKmode, destreg);
+ expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
+ desired_align);
+ }
+ else
+ {
+ /* If we know how many bytes need to be stored before dst is
+ sufficiently aligned, maintain aliasing info accurately. */
+ dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
+ desired_align, align_bytes);
+ count_exp = plus_constant (count_exp, -align_bytes);
+ count -= align_bytes;
+ }
+ if (need_zero_guard
+ && (count < (unsigned HOST_WIDE_INT) size_needed
+ || (align_bytes == 0
+ && count < ((unsigned HOST_WIDE_INT) size_needed
+ + desired_align - align))))
+ {
+ /* It is possible that we copied enough so the main loop will not
+ execute. */
+ gcc_assert (size_needed > 1);
+ if (label == NULL_RTX)
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (count_exp,
+ GEN_INT (size_needed),
+ LTU, 0, counter_mode (count_exp), 1, label);
+ if (expected_size == -1
+ || expected_size < (desired_align - align) / 2 + size_needed)
+ predict_jump (REG_BR_PROB_BASE * 20 / 100);
+ else
+ predict_jump (REG_BR_PROB_BASE * 60 / 100);
+ }
+ }
+ if (label && size_needed == 1)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ label = NULL;
+ promoted_val = val_exp;
+ epilogue_size_needed = 1;
+ }
+ else if (label == NULL_RTX)
+ epilogue_size_needed = size_needed;
+
+ /* Step 3: Main loop. */
+
+ switch (alg)
+ {
+ case libcall:
+ case no_stringop:
+ gcc_unreachable ();
+ case loop_1_byte:
+ expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
+ count_exp, QImode, 1, expected_size);
+ break;
+ case loop:
+ expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
+ count_exp, Pmode, 1, expected_size);
+ break;
+ case unrolled_loop:
+ expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
+ count_exp, Pmode, 4, expected_size);
+ break;
+ case rep_prefix_8_byte:
+ expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
+ DImode, val_exp);
+ break;
+ case rep_prefix_4_byte:
+ expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
+ SImode, val_exp);
+ break;
+ case rep_prefix_1_byte:
+ expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
+ QImode, val_exp);
+ break;
+ }
+ /* Adjust properly the offset of src and dest memory for aliasing. */
+ if (CONST_INT_P (count_exp))
+ dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
+ (count / size_needed) * size_needed);
+ else
+ dst = change_address (dst, BLKmode, destreg);
+
+ /* Step 4: Epilogue to copy the remaining bytes. */
+
+ if (label)
+ {
+ /* When the main loop is done, COUNT_EXP might hold original count,
+ while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
+ Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
+ bytes. Compensate if needed. */
+
+ if (size_needed < epilogue_size_needed)
+ {
+ tmp =
+ expand_simple_binop (counter_mode (count_exp), AND, count_exp,
+ GEN_INT (size_needed - 1), count_exp, 1,
+ OPTAB_DIRECT);
+ if (tmp != count_exp)
+ emit_move_insn (count_exp, tmp);
+ }
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ epilogue:
+ if (count_exp != const0_rtx && epilogue_size_needed > 1)
+ {
+ if (force_loopy_epilogue)
+ expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
+ epilogue_size_needed);
+ else
+ expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
+ epilogue_size_needed);
+ }
+ if (jump_around_label)
+ emit_label (jump_around_label);
+ return 1;
+}
+
+/* Expand the appropriate insns for doing strlen if not just doing
+ repnz; scasb
+
+ out = result, initialized with the start address
+ align_rtx = alignment of the address.
+ scratch = scratch register, initialized with the startaddress when
+ not aligned, otherwise undefined
+
+ This is just the body. It needs the initializations mentioned above and
+ some address computing at the end. These things are done in i386.md. */
+
+static void
+ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
+{
+ int align;
+ rtx tmp;
+ rtx align_2_label = NULL_RTX;
+ rtx align_3_label = NULL_RTX;
+ rtx align_4_label = gen_label_rtx ();
+ rtx end_0_label = gen_label_rtx ();
+ rtx mem;
+ rtx tmpreg = gen_reg_rtx (SImode);
+ rtx scratch = gen_reg_rtx (SImode);
+ rtx cmp;
+
+ align = 0;
+ if (CONST_INT_P (align_rtx))
+ align = INTVAL (align_rtx);
+
+ /* Loop to check 1..3 bytes for null to get an aligned pointer. */
+
+ /* Is there a known alignment and is it less than 4? */
+ if (align < 4)
+ {
+ rtx scratch1 = gen_reg_rtx (Pmode);
+ emit_move_insn (scratch1, out);
+ /* Is there a known alignment and is it not 2? */
+ if (align != 2)
+ {
+ align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
+ align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
+
+ /* Leave just the 3 lower bits. */
+ align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
+ Pmode, 1, align_4_label);
+ emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
+ Pmode, 1, align_2_label);
+ emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
+ Pmode, 1, align_3_label);
+ }
+ else
+ {
+ /* Since the alignment is 2, we have to check 2 or 0 bytes;
+ check if is aligned to 4 - byte. */
+
+ align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
+ Pmode, 1, align_4_label);
+ }
+
+ mem = change_address (src, QImode, out);
+
+ /* Now compare the bytes. */
+
+ /* Compare the first n unaligned byte on a byte per byte basis. */
+ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
+ QImode, 1, end_0_label);
+
+ /* Increment the address. */
+ emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
+
+ /* Not needed with an alignment of 2 */
+ if (align != 2)
+ {
+ emit_label (align_2_label);
+
+ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
+ end_0_label);
+
+ emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
+
+ emit_label (align_3_label);
+ }
+
+ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
+ end_0_label);
+
+ emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
+ }
+
+ /* Generate loop to check 4 bytes at a time. It is not a good idea to
+ align this loop. It gives only huge programs, but does not help to
+ speed up. */
+ emit_label (align_4_label);
+
+ mem = change_address (src, SImode, out);
+ emit_move_insn (scratch, mem);
+ emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
+
+ /* This formula yields a nonzero result iff one of the bytes is zero.
+ This saves three branches inside loop and many cycles. */
+
+ emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
+ emit_insn (gen_one_cmplsi2 (scratch, scratch));
+ emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
+ emit_insn (gen_andsi3 (tmpreg, tmpreg,
+ gen_int_mode (0x80808080, SImode)));
+ emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
+ align_4_label);
+
+ if (TARGET_CMOVE)
+ {
+ rtx reg = gen_reg_rtx (SImode);
+ rtx reg2 = gen_reg_rtx (Pmode);
+ emit_move_insn (reg, tmpreg);
+ emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
+
+ /* If zero is not in the first two bytes, move two bytes forward. */
+ emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
+ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
+ gen_rtx_IF_THEN_ELSE (SImode, tmp,
+ reg,
+ tmpreg)));
+ /* Emit lea manually to avoid clobbering of flags. */
+ emit_insn (gen_rtx_SET (SImode, reg2,
+ gen_rtx_PLUS (Pmode, out, const2_rtx)));
+
+ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, out,
+ gen_rtx_IF_THEN_ELSE (Pmode, tmp,
+ reg2,
+ out)));
+
+ }
+ else
+ {
+ rtx end_2_label = gen_label_rtx ();
+ /* Is zero in the first two bytes? */
+
+ emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
+ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, end_2_label),
+ pc_rtx);
+ tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ JUMP_LABEL (tmp) = end_2_label;
+
+ /* Not in the first two. Move two bytes forward. */
+ emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
+ emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
+
+ emit_label (end_2_label);
+
+ }
+
+ /* Avoid branch in fixing the byte. */
+ tmpreg = gen_lowpart (QImode, tmpreg);
+ emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
+ cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
+ emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
+
+ emit_label (end_0_label);
+}
+
+/* Expand strlen. */
+
+int
+ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
+{
+ rtx addr, scratch1, scratch2, scratch3, scratch4;
+
+ /* The generic case of strlen expander is long. Avoid it's
+ expanding unless TARGET_INLINE_ALL_STRINGOPS. */
+
+ if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+ && !TARGET_INLINE_ALL_STRINGOPS
+ && !optimize_insn_for_size_p ()
+ && (!CONST_INT_P (align) || INTVAL (align) < 4))
+ return 0;
+
+ addr = force_reg (Pmode, XEXP (src, 0));
+ scratch1 = gen_reg_rtx (Pmode);
+
+ if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+ && !optimize_insn_for_size_p ())
+ {
+ /* Well it seems that some optimizer does not combine a call like
+ foo(strlen(bar), strlen(bar));
+ when the move and the subtraction is done here. It does calculate
+ the length just once when these instructions are done inside of
+ output_strlen_unroll(). But I think since &bar[strlen(bar)] is
+ often used and I use one fewer register for the lifetime of
+ output_strlen_unroll() this is better. */
+
+ emit_move_insn (out, addr);
+
+ ix86_expand_strlensi_unroll_1 (out, src, align);
+
+ /* strlensi_unroll_1 returns the address of the zero at the end of
+ the string, like memchr(), so compute the length by subtracting
+ the start address. */
+ emit_insn ((*ix86_gen_sub3) (out, out, addr));
+ }
+ else
+ {
+ rtx unspec;
+
+ /* Can't use this if the user has appropriated eax, ecx, or edi. */
+ if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
+ return false;
+
+ scratch2 = gen_reg_rtx (Pmode);
+ scratch3 = gen_reg_rtx (Pmode);
+ scratch4 = force_reg (Pmode, constm1_rtx);
+
+ emit_move_insn (scratch3, addr);
+ eoschar = force_reg (QImode, eoschar);
+
+ src = replace_equiv_address_nv (src, scratch3);
+
+ /* If .md starts supporting :P, this can be done in .md. */
+ unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
+ scratch4), UNSPEC_SCAS);
+ emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
+ emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
+ emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
+ }
+ return 1;
+}
+
+/* For given symbol (function) construct code to compute address of it's PLT
+ entry in large x86-64 PIC model. */
+rtx
+construct_plt_address (rtx symbol)
+{
+ rtx tmp = gen_reg_rtx (Pmode);
+ rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
+
+ gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+ gcc_assert (ix86_cmodel == CM_LARGE_PIC);
+
+ emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
+ emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
+ return tmp;
+}
+
+void
+ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
+ rtx callarg2,
+ rtx pop, int sibcall)
+{
+ rtx use = NULL, call;
+
+ if (pop == const0_rtx)
+ pop = NULL;
+ gcc_assert (!TARGET_64BIT || !pop);
+
+ if (TARGET_MACHO && !TARGET_64BIT)
+ {
+#if TARGET_MACHO
+ if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
+ fnaddr = machopic_indirect_call_target (fnaddr);
+#endif
+ }
+ else
+ {
+ /* Static functions and indirect calls don't need the pic register. */
+ if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
+ && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
+ && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
+ use_reg (&use, pic_offset_table_rtx);
+ }
+
+ if (TARGET_64BIT && INTVAL (callarg2) >= 0)
+ {
+ rtx al = gen_rtx_REG (QImode, AX_REG);
+ emit_move_insn (al, callarg2);
+ use_reg (&use, al);
+ }
+
+ if (ix86_cmodel == CM_LARGE_PIC
+ && GET_CODE (fnaddr) == MEM
+ && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
+ && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
+ fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
+ else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
+ {
+ fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
+ fnaddr = gen_rtx_MEM (QImode, fnaddr);
+ }
+ if (sibcall && TARGET_64BIT
+ && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
+ {
+ rtx addr;
+ addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
+ fnaddr = gen_rtx_REG (Pmode, R11_REG);
+ emit_move_insn (fnaddr, addr);
+ fnaddr = gen_rtx_MEM (QImode, fnaddr);
+ }
+
+ call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
+ if (retval)
+ call = gen_rtx_SET (VOIDmode, retval, call);
+ if (pop)
+ {
+ pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
+ pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
+ call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
+ }
+ if (TARGET_64BIT
+ && ix86_cfun_abi () == MS_ABI
+ && (!callarg2 || INTVAL (callarg2) != -2))
+ {
+ /* We need to represent that SI and DI registers are clobbered
+ by SYSV calls. */
+ static int clobbered_registers[] = {
+ XMM6_REG, XMM7_REG, XMM8_REG,
+ XMM9_REG, XMM10_REG, XMM11_REG,
+ XMM12_REG, XMM13_REG, XMM14_REG,
+ XMM15_REG, SI_REG, DI_REG
+ };
+ unsigned int i;
+ rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
+ rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
+ UNSPEC_MS_TO_SYSV_CALL);
+
+ vec[0] = call;
+ vec[1] = unspec;
+ for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
+ vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
+ ? TImode : DImode,
+ gen_rtx_REG
+ (SSE_REGNO_P (clobbered_registers[i])
+ ? TImode : DImode,
+ clobbered_registers[i]));
+
+ call = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
+ + 2, vec));
+ }
+
+ call = emit_call_insn (call);
+ if (use)
+ CALL_INSN_FUNCTION_USAGE (call) = use;
+}
+
+
+/* Clear stack slot assignments remembered from previous functions.
+ This is called from INIT_EXPANDERS once before RTL is emitted for each
+ function. */
+
+static struct machine_function *
+ix86_init_machine_status (void)
+{
+ struct machine_function *f;
+
+ f = GGC_CNEW (struct machine_function);
+ f->use_fast_prologue_epilogue_nregs = -1;
+ f->tls_descriptor_call_expanded_p = 0;
+ f->call_abi = DEFAULT_ABI;
+
+ return f;
+}
+
+/* Return a MEM corresponding to a stack slot with mode MODE.
+ Allocate a new slot if necessary.
+
+ The RTL for a function can have several slots available: N is
+ which slot to use. */
+
+rtx
+assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
+{
+ struct stack_local_entry *s;
+
+ gcc_assert (n < MAX_386_STACK_LOCALS);
+
+ /* Virtual slot is valid only before vregs are instantiated. */
+ gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
+
+ for (s = ix86_stack_locals; s; s = s->next)
+ if (s->mode == mode && s->n == n)
+ return copy_rtx (s->rtl);
+
+ s = (struct stack_local_entry *)
+ ggc_alloc (sizeof (struct stack_local_entry));
+ s->n = n;
+ s->mode = mode;
+ s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
+
+ s->next = ix86_stack_locals;
+ ix86_stack_locals = s;
+ return s->rtl;
+}
+
+/* Construct the SYMBOL_REF for the tls_get_addr function. */
+
+static GTY(()) rtx ix86_tls_symbol;
+rtx
+ix86_tls_get_addr (void)
+{
+
+ if (!ix86_tls_symbol)
+ {
+ ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
+ (TARGET_ANY_GNU_TLS
+ && !TARGET_64BIT)
+ ? "___tls_get_addr"
+ : "__tls_get_addr");
+ }
+
+ return ix86_tls_symbol;
+}
+
+/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
+
+static GTY(()) rtx ix86_tls_module_base_symbol;
+rtx
+ix86_tls_module_base (void)
+{
+
+ if (!ix86_tls_module_base_symbol)
+ {
+ ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
+ "_TLS_MODULE_BASE_");
+ SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
+ |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+ }
+
+ return ix86_tls_module_base_symbol;
+}
+
+/* Calculate the length of the memory address in the instruction
+ encoding. Does not include the one-byte modrm, opcode, or prefix. */
+
+int
+memory_address_length (rtx addr)
+{
+ struct ix86_address parts;
+ rtx base, index, disp;
+ int len;
+ int ok;
+
+ if (GET_CODE (addr) == PRE_DEC
+ || GET_CODE (addr) == POST_INC
+ || GET_CODE (addr) == PRE_MODIFY
+ || GET_CODE (addr) == POST_MODIFY)
+ return 0;
+
+ ok = ix86_decompose_address (addr, &parts);
+ gcc_assert (ok);
+
+ if (parts.base && GET_CODE (parts.base) == SUBREG)
+ parts.base = SUBREG_REG (parts.base);
+ if (parts.index && GET_CODE (parts.index) == SUBREG)
+ parts.index = SUBREG_REG (parts.index);
+
+ base = parts.base;
+ index = parts.index;
+ disp = parts.disp;
+ len = 0;
+
+ /* Rule of thumb:
+ - esp as the base always wants an index,
+ - ebp as the base always wants a displacement. */
+
+ /* Register Indirect. */
+ if (base && !index && !disp)
+ {
+ /* esp (for its index) and ebp (for its displacement) need
+ the two-byte modrm form. */
+ if (addr == stack_pointer_rtx
+ || addr == arg_pointer_rtx
+ || addr == frame_pointer_rtx
+ || addr == hard_frame_pointer_rtx)
+ len = 1;
+ }
+
+ /* Direct Addressing. */
+ else if (disp && !base && !index)
+ len = 4;
+
+ else
+ {
+ /* Find the length of the displacement constant. */
+ if (disp)
+ {
+ if (base && satisfies_constraint_K (disp))
+ len = 1;
+ else
+ len = 4;
+ }
+ /* ebp always wants a displacement. */
+ else if (base == hard_frame_pointer_rtx)
+ len = 1;
+
+ /* An index requires the two-byte modrm form.... */
+ if (index
+ /* ...like esp, which always wants an index. */
+ || base == stack_pointer_rtx
+ || base == arg_pointer_rtx
+ || base == frame_pointer_rtx)
+ len += 1;
+ }
+
+ return len;
+}
+
+/* Compute default value for "length_immediate" attribute. When SHORTFORM
+ is set, expect that insn have 8bit immediate alternative. */
+int
+ix86_attr_length_immediate_default (rtx insn, int shortform)
+{
+ int len = 0;
+ int i;
+ extract_insn_cached (insn);
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (CONSTANT_P (recog_data.operand[i]))
+ {
+ gcc_assert (!len);
+ if (shortform && satisfies_constraint_K (recog_data.operand[i]))
+ len = 1;
+ else
+ {
+ switch (get_attr_mode (insn))
+ {
+ case MODE_QI:
+ len+=1;
+ break;
+ case MODE_HI:
+ len+=2;
+ break;
+ case MODE_SI:
+ len+=4;
+ break;
+ /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
+ case MODE_DI:
+ len+=4;
+ break;
+ default:
+ fatal_insn ("unknown insn mode", insn);
+ }
+ }
+ }
+ return len;
+}
+/* Compute default value for "length_address" attribute. */
+int
+ix86_attr_length_address_default (rtx insn)
+{
+ int i;
+
+ if (get_attr_type (insn) == TYPE_LEA)
+ {
+ rtx set = PATTERN (insn);
+
+ if (GET_CODE (set) == PARALLEL)
+ set = XVECEXP (set, 0, 0);
+
+ gcc_assert (GET_CODE (set) == SET);
+
+ return memory_address_length (SET_SRC (set));
+ }
+
+ extract_insn_cached (insn);
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (MEM_P (recog_data.operand[i]))
+ {
+ return memory_address_length (XEXP (recog_data.operand[i], 0));
+ break;
+ }
+ return 0;
+}
+
+/* Compute default value for "length_vex" attribute. It includes
+ 2 or 3 byte VEX prefix and 1 opcode byte. */
+
+int
+ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
+ int has_vex_w)
+{
+ int i;
+
+ /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
+ byte VEX prefix. */
+ if (!has_0f_opcode || has_vex_w)
+ return 3 + 1;
+
+ /* We can always use 2 byte VEX prefix in 32bit. */
+ if (!TARGET_64BIT)
+ return 2 + 1;
+
+ extract_insn_cached (insn);
+
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (REG_P (recog_data.operand[i]))
+ {
+ /* REX.W bit uses 3 byte VEX prefix. */
+ if (GET_MODE (recog_data.operand[i]) == DImode)
+ return 3 + 1;
+ }
+ else
+ {
+ /* REX.X or REX.B bits use 3 byte VEX prefix. */
+ if (MEM_P (recog_data.operand[i])
+ && x86_extended_reg_mentioned_p (recog_data.operand[i]))
+ return 3 + 1;
+ }
+
+ return 2 + 1;
+}
+
+/* Return the maximum number of instructions a cpu can issue. */
+
+static int
+ix86_issue_rate (void)
+{
+ switch (ix86_tune)
+ {
+ case PROCESSOR_PENTIUM:
+ case PROCESSOR_K6:
+ return 2;
+
+ case PROCESSOR_PENTIUMPRO:
+ case PROCESSOR_PENTIUM4:
+ case PROCESSOR_ATHLON:
+ case PROCESSOR_K8:
+ case PROCESSOR_AMDFAM10:
+ case PROCESSOR_NOCONA:
+ case PROCESSOR_GENERIC32:
+ case PROCESSOR_GENERIC64:
+ return 3;
+
+ case PROCESSOR_CORE2:
+ return 4;
+
+ default:
+ return 1;
+ }
+}
+
+/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
+ by DEP_INSN and nothing set by DEP_INSN. */
+
+static int
+ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
+{
+ rtx set, set2;
+
+ /* Simplify the test for uninteresting insns. */
+ if (insn_type != TYPE_SETCC
+ && insn_type != TYPE_ICMOV
+ && insn_type != TYPE_FCMOV
+ && insn_type != TYPE_IBR)
+ return 0;
+
+ if ((set = single_set (dep_insn)) != 0)
+ {
+ set = SET_DEST (set);
+ set2 = NULL_RTX;
+ }
+ else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
+ && XVECLEN (PATTERN (dep_insn), 0) == 2
+ && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
+ && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
+ {
+ set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
+ set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
+ }
+ else
+ return 0;
+
+ if (!REG_P (set) || REGNO (set) != FLAGS_REG)
+ return 0;
+
+ /* This test is true if the dependent insn reads the flags but
+ not any other potentially set register. */
+ if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
+ return 0;
+
+ if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
+ return 0;
+
+ return 1;
+}
+
+/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
+ address with operands set by DEP_INSN. */
+
+static int
+ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
+{
+ rtx addr;
+
+ if (insn_type == TYPE_LEA
+ && TARGET_PENTIUM)
+ {
+ addr = PATTERN (insn);
+
+ if (GET_CODE (addr) == PARALLEL)
+ addr = XVECEXP (addr, 0, 0);
+
+ gcc_assert (GET_CODE (addr) == SET);
+
+ addr = SET_SRC (addr);
+ }
+ else
+ {
+ int i;
+ extract_insn_cached (insn);
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (MEM_P (recog_data.operand[i]))
+ {
+ addr = XEXP (recog_data.operand[i], 0);
+ goto found;
+ }
+ return 0;
+ found:;
+ }
+
+ return modified_in_p (addr, dep_insn);
+}
+
+static int
+ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+{
+ enum attr_type insn_type, dep_insn_type;
+ enum attr_memory memory;
+ rtx set, set2;
+ int dep_insn_code_number;
+
+ /* Anti and output dependencies have zero cost on all CPUs. */
+ if (REG_NOTE_KIND (link) != 0)
+ return 0;
+
+ dep_insn_code_number = recog_memoized (dep_insn);
+
+ /* If we can't recognize the insns, we can't really do anything. */
+ if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
+ return cost;
+
+ insn_type = get_attr_type (insn);
+ dep_insn_type = get_attr_type (dep_insn);
+
+ switch (ix86_tune)
+ {
+ case PROCESSOR_PENTIUM:
+ /* Address Generation Interlock adds a cycle of latency. */
+ if (ix86_agi_dependent (insn, dep_insn, insn_type))
+ cost += 1;
+
+ /* ??? Compares pair with jump/setcc. */
+ if (ix86_flags_dependent (insn, dep_insn, insn_type))
+ cost = 0;
+
+ /* Floating point stores require value to be ready one cycle earlier. */
+ if (insn_type == TYPE_FMOV
+ && get_attr_memory (insn) == MEMORY_STORE
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ cost += 1;
+ break;
+
+ case PROCESSOR_PENTIUMPRO:
+ memory = get_attr_memory (insn);
+
+ /* INT->FP conversion is expensive. */
+ if (get_attr_fp_int_src (dep_insn))
+ cost += 5;
+
+ /* There is one cycle extra latency between an FP op and a store. */
+ if (insn_type == TYPE_FMOV
+ && (set = single_set (dep_insn)) != NULL_RTX
+ && (set2 = single_set (insn)) != NULL_RTX
+ && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
+ && MEM_P (SET_DEST (set2)))
+ cost += 1;
+
+ /* Show ability of reorder buffer to hide latency of load by executing
+ in parallel with previous instruction in case
+ previous instruction is not needed to compute the address. */
+ if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ {
+ /* Claim moves to take one cycle, as core can issue one load
+ at time and the next load can start cycle later. */
+ if (dep_insn_type == TYPE_IMOV
+ || dep_insn_type == TYPE_FMOV)
+ cost = 1;
+ else if (cost > 1)
+ cost--;
+ }
+ break;
+
+ case PROCESSOR_K6:
+ memory = get_attr_memory (insn);
+
+ /* The esp dependency is resolved before the instruction is really
+ finished. */
+ if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
+ && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
+ return 1;
+
+ /* INT->FP conversion is expensive. */
+ if (get_attr_fp_int_src (dep_insn))
+ cost += 5;
+
+ /* Show ability of reorder buffer to hide latency of load by executing
+ in parallel with previous instruction in case
+ previous instruction is not needed to compute the address. */
+ if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ {
+ /* Claim moves to take one cycle, as core can issue one load
+ at time and the next load can start cycle later. */
+ if (dep_insn_type == TYPE_IMOV
+ || dep_insn_type == TYPE_FMOV)
+ cost = 1;
+ else if (cost > 2)
+ cost -= 2;
+ else
+ cost = 1;
+ }
+ break;
+
+ case PROCESSOR_ATHLON:
+ case PROCESSOR_K8:
+ case PROCESSOR_AMDFAM10:
+ case PROCESSOR_GENERIC32:
+ case PROCESSOR_GENERIC64:
+ memory = get_attr_memory (insn);
+
+ /* Show ability of reorder buffer to hide latency of load by executing
+ in parallel with previous instruction in case
+ previous instruction is not needed to compute the address. */
+ if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ {
+ enum attr_unit unit = get_attr_unit (insn);
+ int loadcost = 3;
+
+ /* Because of the difference between the length of integer and
+ floating unit pipeline preparation stages, the memory operands
+ for floating point are cheaper.
+
+ ??? For Athlon it the difference is most probably 2. */
+ if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
+ loadcost = 3;
+ else
+ loadcost = TARGET_ATHLON ? 2 : 0;
+
+ if (cost >= loadcost)
+ cost -= loadcost;
+ else
+ cost = 0;
+ }
+
+ default:
+ break;
+ }
+
+ return cost;
+}
+
+/* How many alternative schedules to try. This should be as wide as the
+ scheduling freedom in the DFA, but no wider. Making this value too
+ large results extra work for the scheduler. */
+
+static int
+ia32_multipass_dfa_lookahead (void)
+{
+ switch (ix86_tune)
+ {
+ case PROCESSOR_PENTIUM:
+ return 2;
+
+ case PROCESSOR_PENTIUMPRO:
+ case PROCESSOR_K6:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+
+/* Compute the alignment given to a constant that is being placed in memory.
+ EXP is the constant and ALIGN is the alignment that the object would
+ ordinarily have.
+ The value of this function is used instead of that alignment to align
+ the object. */
+
+int
+ix86_constant_alignment (tree exp, int align)
+{
+ if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
+ || TREE_CODE (exp) == INTEGER_CST)
+ {
+ if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
+ return 64;
+ else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
+ return 128;
+ }
+ else if (!optimize_size && TREE_CODE (exp) == STRING_CST
+ && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
+ return BITS_PER_WORD;
+
+ return align;
+}
+
+/* Compute the alignment for a static variable.
+ TYPE is the data type, and ALIGN is the alignment that
+ the object would ordinarily have. The value of this function is used
+ instead of that alignment to align the object. */
+
+int
+ix86_data_alignment (tree type, int align)
+{
+ int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
+
+ if (AGGREGATE_TYPE_P (type)
+ && TYPE_SIZE (type)
+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
+ || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
+ && align < max_align)
+ align = max_align;
+
+ /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
+ to 16byte boundary. */
+ if (TARGET_64BIT)
+ {
+ if (AGGREGATE_TYPE_P (type)
+ && TYPE_SIZE (type)
+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
+ || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+ return 128;
+ }
+
+ if (TREE_CODE (type) == ARRAY_TYPE)
+ {
+ if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
+ return 128;
+ }
+ else if (TREE_CODE (type) == COMPLEX_TYPE)
+ {
+
+ if (TYPE_MODE (type) == DCmode && align < 64)
+ return 64;
+ if ((TYPE_MODE (type) == XCmode
+ || TYPE_MODE (type) == TCmode) && align < 128)
+ return 128;
+ }
+ else if ((TREE_CODE (type) == RECORD_TYPE
+ || TREE_CODE (type) == UNION_TYPE
+ || TREE_CODE (type) == QUAL_UNION_TYPE)
+ && TYPE_FIELDS (type))
+ {
+ if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
+ return 128;
+ }
+ else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
+ || TREE_CODE (type) == INTEGER_TYPE)
+ {
+ if (TYPE_MODE (type) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
+ return 128;
+ }
+
+ return align;
+}
+
+/* Compute the alignment for a local variable or a stack slot. EXP is
+ the data type or decl itself, MODE is the widest mode available and
+ ALIGN is the alignment that the object would ordinarily have. The
+ value of this macro is used instead of that alignment to align the
+ object. */
+
+unsigned int
+ix86_local_alignment (tree exp, enum machine_mode mode,
+ unsigned int align)
+{
+ tree type, decl;
+
+ if (exp && DECL_P (exp))
+ {
+ type = TREE_TYPE (exp);
+ decl = exp;
+ }
+ else
+ {
+ type = exp;
+ decl = NULL;
+ }
+
+ /* Don't do dynamic stack realignment for long long objects with
+ -mpreferred-stack-boundary=2. */
+ if (!TARGET_64BIT
+ && align == 64
+ && ix86_preferred_stack_boundary < 64
+ && (mode == DImode || (type && TYPE_MODE (type) == DImode))
+ && (!type || !TYPE_USER_ALIGN (type))
+ && (!decl || !DECL_USER_ALIGN (decl)))
+ align = 32;
+
+ /* If TYPE is NULL, we are allocating a stack slot for caller-save
+ register in MODE. We will return the largest alignment of XF
+ and DF. */
+ if (!type)
+ {
+ if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
+ align = GET_MODE_ALIGNMENT (DFmode);
+ return align;
+ }
+
+ /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
+ to 16byte boundary. */
+ if (TARGET_64BIT)
+ {
+ if (AGGREGATE_TYPE_P (type)
+ && TYPE_SIZE (type)
+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
+ || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+ return 128;
+ }
+ if (TREE_CODE (type) == ARRAY_TYPE)
+ {
+ if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
+ return 128;
+ }
+ else if (TREE_CODE (type) == COMPLEX_TYPE)
+ {
+ if (TYPE_MODE (type) == DCmode && align < 64)
+ return 64;
+ if ((TYPE_MODE (type) == XCmode
+ || TYPE_MODE (type) == TCmode) && align < 128)
+ return 128;
+ }
+ else if ((TREE_CODE (type) == RECORD_TYPE
+ || TREE_CODE (type) == UNION_TYPE
+ || TREE_CODE (type) == QUAL_UNION_TYPE)
+ && TYPE_FIELDS (type))
+ {
+ if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
+ return 128;
+ }
+ else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
+ || TREE_CODE (type) == INTEGER_TYPE)
+ {
+
+ if (TYPE_MODE (type) == DFmode && align < 64)
+ return 64;
+ if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
+ return 128;
+ }
+ return align;
+}
+
+/* Compute the minimum required alignment for dynamic stack realignment
+ purposes for a local variable, parameter or a stack slot. EXP is
+ the data type or decl itself, MODE is its mode and ALIGN is the
+ alignment that the object would ordinarily have. */
+
+unsigned int
+ix86_minimum_alignment (tree exp, enum machine_mode mode,
+ unsigned int align)
+{
+ tree type, decl;
+
+ if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
+ return align;
+
+ if (exp && DECL_P (exp))
+ {
+ type = TREE_TYPE (exp);
+ decl = exp;
+ }
+ else
+ {
+ type = exp;
+ decl = NULL;
+ }
+
+ /* Don't do dynamic stack realignment for long long objects with
+ -mpreferred-stack-boundary=2. */
+ if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
+ && (!type || !TYPE_USER_ALIGN (type))
+ && (!decl || !DECL_USER_ALIGN (decl)))
+ return 32;
+
+ return align;
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+ FNADDR is an RTX for the address of the function's pure code.
+ CXT is an RTX for the static chain value for the function. */
+void
+x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
+{
+ if (!TARGET_64BIT)
+ {
+ /* Compute offset from the end of the jmp to the target function. */
+ rtx disp = expand_binop (SImode, sub_optab, fnaddr,
+ plus_constant (tramp, 10),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ emit_move_insn (gen_rtx_MEM (QImode, tramp),
+ gen_int_mode (0xb9, QImode));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
+ emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
+ gen_int_mode (0xe9, QImode));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
+ }
+ else
+ {
+ int offset = 0;
+ /* Try to load address using shorter movl instead of movabs.
+ We may want to support movq for kernel mode, but kernel does not use
+ trampolines at the moment. */
+ if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
+ {
+ fnaddr = copy_to_mode_reg (DImode, fnaddr);
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ gen_int_mode (0xbb41, HImode));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
+ gen_lowpart (SImode, fnaddr));
+ offset += 6;
+ }
+ else
+ {
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ gen_int_mode (0xbb49, HImode));
+ emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
+ fnaddr);
+ offset += 10;
+ }
+ /* Load static chain using movabs to r10. */
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ gen_int_mode (0xba49, HImode));
+ emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
+ cxt);
+ offset += 10;
+ /* Jump to the r11 */
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ gen_int_mode (0xff49, HImode));
+ emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
+ gen_int_mode (0xe3, QImode));
+ offset += 3;
+ gcc_assert (offset <= TRAMPOLINE_SIZE);
+ }
+
+#ifdef ENABLE_EXECUTE_STACK
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
+ LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
+#endif
+}
+
+/* Codes for all the SSE/MMX builtins. */
+enum ix86_builtins
+{
+ IX86_BUILTIN_ADDPS,
+ IX86_BUILTIN_ADDSS,
+ IX86_BUILTIN_DIVPS,
+ IX86_BUILTIN_DIVSS,
+ IX86_BUILTIN_MULPS,
+ IX86_BUILTIN_MULSS,
+ IX86_BUILTIN_SUBPS,
+ IX86_BUILTIN_SUBSS,
+
+ IX86_BUILTIN_CMPEQPS,
+ IX86_BUILTIN_CMPLTPS,
+ IX86_BUILTIN_CMPLEPS,
+ IX86_BUILTIN_CMPGTPS,
+ IX86_BUILTIN_CMPGEPS,
+ IX86_BUILTIN_CMPNEQPS,
+ IX86_BUILTIN_CMPNLTPS,
+ IX86_BUILTIN_CMPNLEPS,
+ IX86_BUILTIN_CMPNGTPS,
+ IX86_BUILTIN_CMPNGEPS,
+ IX86_BUILTIN_CMPORDPS,
+ IX86_BUILTIN_CMPUNORDPS,
+ IX86_BUILTIN_CMPEQSS,
+ IX86_BUILTIN_CMPLTSS,
+ IX86_BUILTIN_CMPLESS,
+ IX86_BUILTIN_CMPNEQSS,
+ IX86_BUILTIN_CMPNLTSS,
+ IX86_BUILTIN_CMPNLESS,
+ IX86_BUILTIN_CMPNGTSS,
+ IX86_BUILTIN_CMPNGESS,
+ IX86_BUILTIN_CMPORDSS,
+ IX86_BUILTIN_CMPUNORDSS,
+
+ IX86_BUILTIN_COMIEQSS,
+ IX86_BUILTIN_COMILTSS,
+ IX86_BUILTIN_COMILESS,
+ IX86_BUILTIN_COMIGTSS,
+ IX86_BUILTIN_COMIGESS,
+ IX86_BUILTIN_COMINEQSS,
+ IX86_BUILTIN_UCOMIEQSS,
+ IX86_BUILTIN_UCOMILTSS,
+ IX86_BUILTIN_UCOMILESS,
+ IX86_BUILTIN_UCOMIGTSS,
+ IX86_BUILTIN_UCOMIGESS,
+ IX86_BUILTIN_UCOMINEQSS,
+
+ IX86_BUILTIN_CVTPI2PS,
+ IX86_BUILTIN_CVTPS2PI,
+ IX86_BUILTIN_CVTSI2SS,
+ IX86_BUILTIN_CVTSI642SS,
+ IX86_BUILTIN_CVTSS2SI,
+ IX86_BUILTIN_CVTSS2SI64,
+ IX86_BUILTIN_CVTTPS2PI,
+ IX86_BUILTIN_CVTTSS2SI,
+ IX86_BUILTIN_CVTTSS2SI64,
+
+ IX86_BUILTIN_MAXPS,
+ IX86_BUILTIN_MAXSS,
+ IX86_BUILTIN_MINPS,
+ IX86_BUILTIN_MINSS,
+
+ IX86_BUILTIN_LOADUPS,
+ IX86_BUILTIN_STOREUPS,
+ IX86_BUILTIN_MOVSS,
+
+ IX86_BUILTIN_MOVHLPS,
+ IX86_BUILTIN_MOVLHPS,
+ IX86_BUILTIN_LOADHPS,
+ IX86_BUILTIN_LOADLPS,
+ IX86_BUILTIN_STOREHPS,
+ IX86_BUILTIN_STORELPS,
+
+ IX86_BUILTIN_MASKMOVQ,
+ IX86_BUILTIN_MOVMSKPS,
+ IX86_BUILTIN_PMOVMSKB,
+
+ IX86_BUILTIN_MOVNTPS,
+ IX86_BUILTIN_MOVNTQ,
+
+ IX86_BUILTIN_LOADDQU,
+ IX86_BUILTIN_STOREDQU,
+
+ IX86_BUILTIN_PACKSSWB,
+ IX86_BUILTIN_PACKSSDW,
+ IX86_BUILTIN_PACKUSWB,
+
+ IX86_BUILTIN_PADDB,
+ IX86_BUILTIN_PADDW,
+ IX86_BUILTIN_PADDD,
+ IX86_BUILTIN_PADDQ,
+ IX86_BUILTIN_PADDSB,
+ IX86_BUILTIN_PADDSW,
+ IX86_BUILTIN_PADDUSB,
+ IX86_BUILTIN_PADDUSW,
+ IX86_BUILTIN_PSUBB,
+ IX86_BUILTIN_PSUBW,
+ IX86_BUILTIN_PSUBD,
+ IX86_BUILTIN_PSUBQ,
+ IX86_BUILTIN_PSUBSB,
+ IX86_BUILTIN_PSUBSW,
+ IX86_BUILTIN_PSUBUSB,
+ IX86_BUILTIN_PSUBUSW,
+
+ IX86_BUILTIN_PAND,
+ IX86_BUILTIN_PANDN,
+ IX86_BUILTIN_POR,
+ IX86_BUILTIN_PXOR,
+
+ IX86_BUILTIN_PAVGB,
+ IX86_BUILTIN_PAVGW,
+
+ IX86_BUILTIN_PCMPEQB,
+ IX86_BUILTIN_PCMPEQW,
+ IX86_BUILTIN_PCMPEQD,
+ IX86_BUILTIN_PCMPGTB,
+ IX86_BUILTIN_PCMPGTW,
+ IX86_BUILTIN_PCMPGTD,
+
+ IX86_BUILTIN_PMADDWD,
+
+ IX86_BUILTIN_PMAXSW,
+ IX86_BUILTIN_PMAXUB,
+ IX86_BUILTIN_PMINSW,
+ IX86_BUILTIN_PMINUB,
+
+ IX86_BUILTIN_PMULHUW,
+ IX86_BUILTIN_PMULHW,
+ IX86_BUILTIN_PMULLW,
+
+ IX86_BUILTIN_PSADBW,
+ IX86_BUILTIN_PSHUFW,
+
+ IX86_BUILTIN_PSLLW,
+ IX86_BUILTIN_PSLLD,
+ IX86_BUILTIN_PSLLQ,
+ IX86_BUILTIN_PSRAW,
+ IX86_BUILTIN_PSRAD,
+ IX86_BUILTIN_PSRLW,
+ IX86_BUILTIN_PSRLD,
+ IX86_BUILTIN_PSRLQ,
+ IX86_BUILTIN_PSLLWI,
+ IX86_BUILTIN_PSLLDI,
+ IX86_BUILTIN_PSLLQI,
+ IX86_BUILTIN_PSRAWI,
+ IX86_BUILTIN_PSRADI,
+ IX86_BUILTIN_PSRLWI,
+ IX86_BUILTIN_PSRLDI,
+ IX86_BUILTIN_PSRLQI,
+
+ IX86_BUILTIN_PUNPCKHBW,
+ IX86_BUILTIN_PUNPCKHWD,
+ IX86_BUILTIN_PUNPCKHDQ,
+ IX86_BUILTIN_PUNPCKLBW,
+ IX86_BUILTIN_PUNPCKLWD,
+ IX86_BUILTIN_PUNPCKLDQ,
+
+ IX86_BUILTIN_SHUFPS,
+
+ IX86_BUILTIN_RCPPS,
+ IX86_BUILTIN_RCPSS,
+ IX86_BUILTIN_RSQRTPS,
+ IX86_BUILTIN_RSQRTPS_NR,
+ IX86_BUILTIN_RSQRTSS,
+ IX86_BUILTIN_RSQRTF,
+ IX86_BUILTIN_SQRTPS,
+ IX86_BUILTIN_SQRTPS_NR,
+ IX86_BUILTIN_SQRTSS,
+
+ IX86_BUILTIN_UNPCKHPS,
+ IX86_BUILTIN_UNPCKLPS,
+
+ IX86_BUILTIN_ANDPS,
+ IX86_BUILTIN_ANDNPS,
+ IX86_BUILTIN_ORPS,
+ IX86_BUILTIN_XORPS,
+
+ IX86_BUILTIN_EMMS,
+ IX86_BUILTIN_LDMXCSR,
+ IX86_BUILTIN_STMXCSR,
+ IX86_BUILTIN_SFENCE,
+
+ /* 3DNow! Original */
+ IX86_BUILTIN_FEMMS,
+ IX86_BUILTIN_PAVGUSB,
+ IX86_BUILTIN_PF2ID,
+ IX86_BUILTIN_PFACC,
+ IX86_BUILTIN_PFADD,
+ IX86_BUILTIN_PFCMPEQ,
+ IX86_BUILTIN_PFCMPGE,
+ IX86_BUILTIN_PFCMPGT,
+ IX86_BUILTIN_PFMAX,
+ IX86_BUILTIN_PFMIN,
+ IX86_BUILTIN_PFMUL,
+ IX86_BUILTIN_PFRCP,
+ IX86_BUILTIN_PFRCPIT1,
+ IX86_BUILTIN_PFRCPIT2,
+ IX86_BUILTIN_PFRSQIT1,
+ IX86_BUILTIN_PFRSQRT,
+ IX86_BUILTIN_PFSUB,
+ IX86_BUILTIN_PFSUBR,
+ IX86_BUILTIN_PI2FD,
+ IX86_BUILTIN_PMULHRW,
+
+ /* 3DNow! Athlon Extensions */
+ IX86_BUILTIN_PF2IW,
+ IX86_BUILTIN_PFNACC,
+ IX86_BUILTIN_PFPNACC,
+ IX86_BUILTIN_PI2FW,
+ IX86_BUILTIN_PSWAPDSI,
+ IX86_BUILTIN_PSWAPDSF,
+
+ /* SSE2 */
+ IX86_BUILTIN_ADDPD,
+ IX86_BUILTIN_ADDSD,
+ IX86_BUILTIN_DIVPD,
+ IX86_BUILTIN_DIVSD,
+ IX86_BUILTIN_MULPD,
+ IX86_BUILTIN_MULSD,
+ IX86_BUILTIN_SUBPD,
+ IX86_BUILTIN_SUBSD,
+
+ IX86_BUILTIN_CMPEQPD,
+ IX86_BUILTIN_CMPLTPD,
+ IX86_BUILTIN_CMPLEPD,
+ IX86_BUILTIN_CMPGTPD,
+ IX86_BUILTIN_CMPGEPD,
+ IX86_BUILTIN_CMPNEQPD,
+ IX86_BUILTIN_CMPNLTPD,
+ IX86_BUILTIN_CMPNLEPD,
+ IX86_BUILTIN_CMPNGTPD,
+ IX86_BUILTIN_CMPNGEPD,
+ IX86_BUILTIN_CMPORDPD,
+ IX86_BUILTIN_CMPUNORDPD,
+ IX86_BUILTIN_CMPEQSD,
+ IX86_BUILTIN_CMPLTSD,
+ IX86_BUILTIN_CMPLESD,
+ IX86_BUILTIN_CMPNEQSD,
+ IX86_BUILTIN_CMPNLTSD,
+ IX86_BUILTIN_CMPNLESD,
+ IX86_BUILTIN_CMPORDSD,
+ IX86_BUILTIN_CMPUNORDSD,
+
+ IX86_BUILTIN_COMIEQSD,
+ IX86_BUILTIN_COMILTSD,
+ IX86_BUILTIN_COMILESD,
+ IX86_BUILTIN_COMIGTSD,
+ IX86_BUILTIN_COMIGESD,
+ IX86_BUILTIN_COMINEQSD,
+ IX86_BUILTIN_UCOMIEQSD,
+ IX86_BUILTIN_UCOMILTSD,
+ IX86_BUILTIN_UCOMILESD,
+ IX86_BUILTIN_UCOMIGTSD,
+ IX86_BUILTIN_UCOMIGESD,
+ IX86_BUILTIN_UCOMINEQSD,
+
+ IX86_BUILTIN_MAXPD,
+ IX86_BUILTIN_MAXSD,
+ IX86_BUILTIN_MINPD,
+ IX86_BUILTIN_MINSD,
+
+ IX86_BUILTIN_ANDPD,
+ IX86_BUILTIN_ANDNPD,
+ IX86_BUILTIN_ORPD,
+ IX86_BUILTIN_XORPD,
+
+ IX86_BUILTIN_SQRTPD,
+ IX86_BUILTIN_SQRTSD,
+
+ IX86_BUILTIN_UNPCKHPD,
+ IX86_BUILTIN_UNPCKLPD,
+
+ IX86_BUILTIN_SHUFPD,
+
+ IX86_BUILTIN_LOADUPD,
+ IX86_BUILTIN_STOREUPD,
+ IX86_BUILTIN_MOVSD,
+
+ IX86_BUILTIN_LOADHPD,
+ IX86_BUILTIN_LOADLPD,
+
+ IX86_BUILTIN_CVTDQ2PD,
+ IX86_BUILTIN_CVTDQ2PS,
+
+ IX86_BUILTIN_CVTPD2DQ,
+ IX86_BUILTIN_CVTPD2PI,
+ IX86_BUILTIN_CVTPD2PS,
+ IX86_BUILTIN_CVTTPD2DQ,
+ IX86_BUILTIN_CVTTPD2PI,
+
+ IX86_BUILTIN_CVTPI2PD,
+ IX86_BUILTIN_CVTSI2SD,
+ IX86_BUILTIN_CVTSI642SD,
+
+ IX86_BUILTIN_CVTSD2SI,
+ IX86_BUILTIN_CVTSD2SI64,
+ IX86_BUILTIN_CVTSD2SS,
+ IX86_BUILTIN_CVTSS2SD,
+ IX86_BUILTIN_CVTTSD2SI,
+ IX86_BUILTIN_CVTTSD2SI64,
+
+ IX86_BUILTIN_CVTPS2DQ,
+ IX86_BUILTIN_CVTPS2PD,
+ IX86_BUILTIN_CVTTPS2DQ,
+
+ IX86_BUILTIN_MOVNTI,
+ IX86_BUILTIN_MOVNTPD,
+ IX86_BUILTIN_MOVNTDQ,
+
+ IX86_BUILTIN_MOVQ128,
+
+ /* SSE2 MMX */
+ IX86_BUILTIN_MASKMOVDQU,
+ IX86_BUILTIN_MOVMSKPD,
+ IX86_BUILTIN_PMOVMSKB128,
+
+ IX86_BUILTIN_PACKSSWB128,
+ IX86_BUILTIN_PACKSSDW128,
+ IX86_BUILTIN_PACKUSWB128,
+
+ IX86_BUILTIN_PADDB128,
+ IX86_BUILTIN_PADDW128,
+ IX86_BUILTIN_PADDD128,
+ IX86_BUILTIN_PADDQ128,
+ IX86_BUILTIN_PADDSB128,
+ IX86_BUILTIN_PADDSW128,
+ IX86_BUILTIN_PADDUSB128,
+ IX86_BUILTIN_PADDUSW128,
+ IX86_BUILTIN_PSUBB128,
+ IX86_BUILTIN_PSUBW128,
+ IX86_BUILTIN_PSUBD128,
+ IX86_BUILTIN_PSUBQ128,
+ IX86_BUILTIN_PSUBSB128,
+ IX86_BUILTIN_PSUBSW128,
+ IX86_BUILTIN_PSUBUSB128,
+ IX86_BUILTIN_PSUBUSW128,
+
+ IX86_BUILTIN_PAND128,
+ IX86_BUILTIN_PANDN128,
+ IX86_BUILTIN_POR128,
+ IX86_BUILTIN_PXOR128,
+
+ IX86_BUILTIN_PAVGB128,
+ IX86_BUILTIN_PAVGW128,
+
+ IX86_BUILTIN_PCMPEQB128,
+ IX86_BUILTIN_PCMPEQW128,
+ IX86_BUILTIN_PCMPEQD128,
+ IX86_BUILTIN_PCMPGTB128,
+ IX86_BUILTIN_PCMPGTW128,
+ IX86_BUILTIN_PCMPGTD128,
+
+ IX86_BUILTIN_PMADDWD128,
+
+ IX86_BUILTIN_PMAXSW128,
+ IX86_BUILTIN_PMAXUB128,
+ IX86_BUILTIN_PMINSW128,
+ IX86_BUILTIN_PMINUB128,
+
+ IX86_BUILTIN_PMULUDQ,
+ IX86_BUILTIN_PMULUDQ128,
+ IX86_BUILTIN_PMULHUW128,
+ IX86_BUILTIN_PMULHW128,
+ IX86_BUILTIN_PMULLW128,
+
+ IX86_BUILTIN_PSADBW128,
+ IX86_BUILTIN_PSHUFHW,
+ IX86_BUILTIN_PSHUFLW,
+ IX86_BUILTIN_PSHUFD,
+
+ IX86_BUILTIN_PSLLDQI128,
+ IX86_BUILTIN_PSLLWI128,
+ IX86_BUILTIN_PSLLDI128,
+ IX86_BUILTIN_PSLLQI128,
+ IX86_BUILTIN_PSRAWI128,
+ IX86_BUILTIN_PSRADI128,
+ IX86_BUILTIN_PSRLDQI128,
+ IX86_BUILTIN_PSRLWI128,
+ IX86_BUILTIN_PSRLDI128,
+ IX86_BUILTIN_PSRLQI128,
+
+ IX86_BUILTIN_PSLLDQ128,
+ IX86_BUILTIN_PSLLW128,
+ IX86_BUILTIN_PSLLD128,
+ IX86_BUILTIN_PSLLQ128,
+ IX86_BUILTIN_PSRAW128,
+ IX86_BUILTIN_PSRAD128,
+ IX86_BUILTIN_PSRLW128,
+ IX86_BUILTIN_PSRLD128,
+ IX86_BUILTIN_PSRLQ128,
+
+ IX86_BUILTIN_PUNPCKHBW128,
+ IX86_BUILTIN_PUNPCKHWD128,
+ IX86_BUILTIN_PUNPCKHDQ128,
+ IX86_BUILTIN_PUNPCKHQDQ128,
+ IX86_BUILTIN_PUNPCKLBW128,
+ IX86_BUILTIN_PUNPCKLWD128,
+ IX86_BUILTIN_PUNPCKLDQ128,
+ IX86_BUILTIN_PUNPCKLQDQ128,
+
+ IX86_BUILTIN_CLFLUSH,
+ IX86_BUILTIN_MFENCE,
+ IX86_BUILTIN_LFENCE,
+
+ /* SSE3. */
+ IX86_BUILTIN_ADDSUBPS,
+ IX86_BUILTIN_HADDPS,
+ IX86_BUILTIN_HSUBPS,
+ IX86_BUILTIN_MOVSHDUP,
+ IX86_BUILTIN_MOVSLDUP,
+ IX86_BUILTIN_ADDSUBPD,
+ IX86_BUILTIN_HADDPD,
+ IX86_BUILTIN_HSUBPD,
+ IX86_BUILTIN_LDDQU,
+
+ IX86_BUILTIN_MONITOR,
+ IX86_BUILTIN_MWAIT,
+
+ /* SSSE3. */
+ IX86_BUILTIN_PHADDW,
+ IX86_BUILTIN_PHADDD,
+ IX86_BUILTIN_PHADDSW,
+ IX86_BUILTIN_PHSUBW,
+ IX86_BUILTIN_PHSUBD,
+ IX86_BUILTIN_PHSUBSW,
+ IX86_BUILTIN_PMADDUBSW,
+ IX86_BUILTIN_PMULHRSW,
+ IX86_BUILTIN_PSHUFB,
+ IX86_BUILTIN_PSIGNB,
+ IX86_BUILTIN_PSIGNW,
+ IX86_BUILTIN_PSIGND,
+ IX86_BUILTIN_PALIGNR,
+ IX86_BUILTIN_PABSB,
+ IX86_BUILTIN_PABSW,
+ IX86_BUILTIN_PABSD,
+
+ IX86_BUILTIN_PHADDW128,
+ IX86_BUILTIN_PHADDD128,
+ IX86_BUILTIN_PHADDSW128,
+ IX86_BUILTIN_PHSUBW128,
+ IX86_BUILTIN_PHSUBD128,
+ IX86_BUILTIN_PHSUBSW128,
+ IX86_BUILTIN_PMADDUBSW128,
+ IX86_BUILTIN_PMULHRSW128,
+ IX86_BUILTIN_PSHUFB128,
+ IX86_BUILTIN_PSIGNB128,
+ IX86_BUILTIN_PSIGNW128,
+ IX86_BUILTIN_PSIGND128,
+ IX86_BUILTIN_PALIGNR128,
+ IX86_BUILTIN_PABSB128,
+ IX86_BUILTIN_PABSW128,
+ IX86_BUILTIN_PABSD128,
+
+ /* AMDFAM10 - SSE4A New Instructions. */
+ IX86_BUILTIN_MOVNTSD,
+ IX86_BUILTIN_MOVNTSS,
+ IX86_BUILTIN_EXTRQI,
+ IX86_BUILTIN_EXTRQ,
+ IX86_BUILTIN_INSERTQI,
+ IX86_BUILTIN_INSERTQ,
+
+ /* SSE4.1. */
+ IX86_BUILTIN_BLENDPD,
+ IX86_BUILTIN_BLENDPS,
+ IX86_BUILTIN_BLENDVPD,
+ IX86_BUILTIN_BLENDVPS,
+ IX86_BUILTIN_PBLENDVB128,
+ IX86_BUILTIN_PBLENDW128,
+
+ IX86_BUILTIN_DPPD,
+ IX86_BUILTIN_DPPS,
+
+ IX86_BUILTIN_INSERTPS128,
+
+ IX86_BUILTIN_MOVNTDQA,
+ IX86_BUILTIN_MPSADBW128,
+ IX86_BUILTIN_PACKUSDW128,
+ IX86_BUILTIN_PCMPEQQ,
+ IX86_BUILTIN_PHMINPOSUW128,
+
+ IX86_BUILTIN_PMAXSB128,
+ IX86_BUILTIN_PMAXSD128,
+ IX86_BUILTIN_PMAXUD128,
+ IX86_BUILTIN_PMAXUW128,
+
+ IX86_BUILTIN_PMINSB128,
+ IX86_BUILTIN_PMINSD128,
+ IX86_BUILTIN_PMINUD128,
+ IX86_BUILTIN_PMINUW128,
+
+ IX86_BUILTIN_PMOVSXBW128,
+ IX86_BUILTIN_PMOVSXBD128,
+ IX86_BUILTIN_PMOVSXBQ128,
+ IX86_BUILTIN_PMOVSXWD128,
+ IX86_BUILTIN_PMOVSXWQ128,
+ IX86_BUILTIN_PMOVSXDQ128,
+
+ IX86_BUILTIN_PMOVZXBW128,
+ IX86_BUILTIN_PMOVZXBD128,
+ IX86_BUILTIN_PMOVZXBQ128,
+ IX86_BUILTIN_PMOVZXWD128,
+ IX86_BUILTIN_PMOVZXWQ128,
+ IX86_BUILTIN_PMOVZXDQ128,
+
+ IX86_BUILTIN_PMULDQ128,
+ IX86_BUILTIN_PMULLD128,
+
+ IX86_BUILTIN_ROUNDPD,
+ IX86_BUILTIN_ROUNDPS,
+ IX86_BUILTIN_ROUNDSD,
+ IX86_BUILTIN_ROUNDSS,
+
+ IX86_BUILTIN_PTESTZ,
+ IX86_BUILTIN_PTESTC,
+ IX86_BUILTIN_PTESTNZC,
+
+ IX86_BUILTIN_VEC_INIT_V2SI,
+ IX86_BUILTIN_VEC_INIT_V4HI,
+ IX86_BUILTIN_VEC_INIT_V8QI,
+ IX86_BUILTIN_VEC_EXT_V2DF,
+ IX86_BUILTIN_VEC_EXT_V2DI,
+ IX86_BUILTIN_VEC_EXT_V4SF,
+ IX86_BUILTIN_VEC_EXT_V4SI,
+ IX86_BUILTIN_VEC_EXT_V8HI,
+ IX86_BUILTIN_VEC_EXT_V2SI,
+ IX86_BUILTIN_VEC_EXT_V4HI,
+ IX86_BUILTIN_VEC_EXT_V16QI,
+ IX86_BUILTIN_VEC_SET_V2DI,
+ IX86_BUILTIN_VEC_SET_V4SF,
+ IX86_BUILTIN_VEC_SET_V4SI,
+ IX86_BUILTIN_VEC_SET_V8HI,
+ IX86_BUILTIN_VEC_SET_V4HI,
+ IX86_BUILTIN_VEC_SET_V16QI,
+
+ IX86_BUILTIN_VEC_PACK_SFIX,
+
+ /* SSE4.2. */
+ IX86_BUILTIN_CRC32QI,
+ IX86_BUILTIN_CRC32HI,
+ IX86_BUILTIN_CRC32SI,
+ IX86_BUILTIN_CRC32DI,
+
+ IX86_BUILTIN_PCMPESTRI128,
+ IX86_BUILTIN_PCMPESTRM128,
+ IX86_BUILTIN_PCMPESTRA128,
+ IX86_BUILTIN_PCMPESTRC128,
+ IX86_BUILTIN_PCMPESTRO128,
+ IX86_BUILTIN_PCMPESTRS128,
+ IX86_BUILTIN_PCMPESTRZ128,
+ IX86_BUILTIN_PCMPISTRI128,
+ IX86_BUILTIN_PCMPISTRM128,
+ IX86_BUILTIN_PCMPISTRA128,
+ IX86_BUILTIN_PCMPISTRC128,
+ IX86_BUILTIN_PCMPISTRO128,
+ IX86_BUILTIN_PCMPISTRS128,
+ IX86_BUILTIN_PCMPISTRZ128,
+
+ IX86_BUILTIN_PCMPGTQ,
+
+ /* AES instructions */
+ IX86_BUILTIN_AESENC128,
+ IX86_BUILTIN_AESENCLAST128,
+ IX86_BUILTIN_AESDEC128,
+ IX86_BUILTIN_AESDECLAST128,
+ IX86_BUILTIN_AESIMC128,
+ IX86_BUILTIN_AESKEYGENASSIST128,
+
+ /* PCLMUL instruction */
+ IX86_BUILTIN_PCLMULQDQ128,
+
+ /* AVX */
+ IX86_BUILTIN_ADDPD256,
+ IX86_BUILTIN_ADDPS256,
+ IX86_BUILTIN_ADDSUBPD256,
+ IX86_BUILTIN_ADDSUBPS256,
+ IX86_BUILTIN_ANDPD256,
+ IX86_BUILTIN_ANDPS256,
+ IX86_BUILTIN_ANDNPD256,
+ IX86_BUILTIN_ANDNPS256,
+ IX86_BUILTIN_BLENDPD256,
+ IX86_BUILTIN_BLENDPS256,
+ IX86_BUILTIN_BLENDVPD256,
+ IX86_BUILTIN_BLENDVPS256,
+ IX86_BUILTIN_DIVPD256,
+ IX86_BUILTIN_DIVPS256,
+ IX86_BUILTIN_DPPS256,
+ IX86_BUILTIN_HADDPD256,
+ IX86_BUILTIN_HADDPS256,
+ IX86_BUILTIN_HSUBPD256,
+ IX86_BUILTIN_HSUBPS256,
+ IX86_BUILTIN_MAXPD256,
+ IX86_BUILTIN_MAXPS256,
+ IX86_BUILTIN_MINPD256,
+ IX86_BUILTIN_MINPS256,
+ IX86_BUILTIN_MULPD256,
+ IX86_BUILTIN_MULPS256,
+ IX86_BUILTIN_ORPD256,
+ IX86_BUILTIN_ORPS256,
+ IX86_BUILTIN_SHUFPD256,
+ IX86_BUILTIN_SHUFPS256,
+ IX86_BUILTIN_SUBPD256,
+ IX86_BUILTIN_SUBPS256,
+ IX86_BUILTIN_XORPD256,
+ IX86_BUILTIN_XORPS256,
+ IX86_BUILTIN_CMPSD,
+ IX86_BUILTIN_CMPSS,
+ IX86_BUILTIN_CMPPD,
+ IX86_BUILTIN_CMPPS,
+ IX86_BUILTIN_CMPPD256,
+ IX86_BUILTIN_CMPPS256,
+ IX86_BUILTIN_CVTDQ2PD256,
+ IX86_BUILTIN_CVTDQ2PS256,
+ IX86_BUILTIN_CVTPD2PS256,
+ IX86_BUILTIN_CVTPS2DQ256,
+ IX86_BUILTIN_CVTPS2PD256,
+ IX86_BUILTIN_CVTTPD2DQ256,
+ IX86_BUILTIN_CVTPD2DQ256,
+ IX86_BUILTIN_CVTTPS2DQ256,
+ IX86_BUILTIN_EXTRACTF128PD256,
+ IX86_BUILTIN_EXTRACTF128PS256,
+ IX86_BUILTIN_EXTRACTF128SI256,
+ IX86_BUILTIN_VZEROALL,
+ IX86_BUILTIN_VZEROUPPER,
+ IX86_BUILTIN_VZEROUPPER_REX64,
+ IX86_BUILTIN_VPERMILVARPD,
+ IX86_BUILTIN_VPERMILVARPS,
+ IX86_BUILTIN_VPERMILVARPD256,
+ IX86_BUILTIN_VPERMILVARPS256,
+ IX86_BUILTIN_VPERMILPD,
+ IX86_BUILTIN_VPERMILPS,
+ IX86_BUILTIN_VPERMILPD256,
+ IX86_BUILTIN_VPERMILPS256,
+ IX86_BUILTIN_VPERM2F128PD256,
+ IX86_BUILTIN_VPERM2F128PS256,
+ IX86_BUILTIN_VPERM2F128SI256,
+ IX86_BUILTIN_VBROADCASTSS,
+ IX86_BUILTIN_VBROADCASTSD256,
+ IX86_BUILTIN_VBROADCASTSS256,
+ IX86_BUILTIN_VBROADCASTPD256,
+ IX86_BUILTIN_VBROADCASTPS256,
+ IX86_BUILTIN_VINSERTF128PD256,
+ IX86_BUILTIN_VINSERTF128PS256,
+ IX86_BUILTIN_VINSERTF128SI256,
+ IX86_BUILTIN_LOADUPD256,
+ IX86_BUILTIN_LOADUPS256,
+ IX86_BUILTIN_STOREUPD256,
+ IX86_BUILTIN_STOREUPS256,
+ IX86_BUILTIN_LDDQU256,
+ IX86_BUILTIN_MOVNTDQ256,
+ IX86_BUILTIN_MOVNTPD256,
+ IX86_BUILTIN_MOVNTPS256,
+ IX86_BUILTIN_LOADDQU256,
+ IX86_BUILTIN_STOREDQU256,
+ IX86_BUILTIN_MASKLOADPD,
+ IX86_BUILTIN_MASKLOADPS,
+ IX86_BUILTIN_MASKSTOREPD,
+ IX86_BUILTIN_MASKSTOREPS,
+ IX86_BUILTIN_MASKLOADPD256,
+ IX86_BUILTIN_MASKLOADPS256,
+ IX86_BUILTIN_MASKSTOREPD256,
+ IX86_BUILTIN_MASKSTOREPS256,
+ IX86_BUILTIN_MOVSHDUP256,
+ IX86_BUILTIN_MOVSLDUP256,
+ IX86_BUILTIN_MOVDDUP256,
+
+ IX86_BUILTIN_SQRTPD256,
+ IX86_BUILTIN_SQRTPS256,
+ IX86_BUILTIN_SQRTPS_NR256,
+ IX86_BUILTIN_RSQRTPS256,
+ IX86_BUILTIN_RSQRTPS_NR256,
+
+ IX86_BUILTIN_RCPPS256,
+
+ IX86_BUILTIN_ROUNDPD256,
+ IX86_BUILTIN_ROUNDPS256,
+
+ IX86_BUILTIN_UNPCKHPD256,
+ IX86_BUILTIN_UNPCKLPD256,
+ IX86_BUILTIN_UNPCKHPS256,
+ IX86_BUILTIN_UNPCKLPS256,
+
+ IX86_BUILTIN_SI256_SI,
+ IX86_BUILTIN_PS256_PS,
+ IX86_BUILTIN_PD256_PD,
+ IX86_BUILTIN_SI_SI256,
+ IX86_BUILTIN_PS_PS256,
+ IX86_BUILTIN_PD_PD256,
+
+ IX86_BUILTIN_VTESTZPD,
+ IX86_BUILTIN_VTESTCPD,
+ IX86_BUILTIN_VTESTNZCPD,
+ IX86_BUILTIN_VTESTZPS,
+ IX86_BUILTIN_VTESTCPS,
+ IX86_BUILTIN_VTESTNZCPS,
+ IX86_BUILTIN_VTESTZPD256,
+ IX86_BUILTIN_VTESTCPD256,
+ IX86_BUILTIN_VTESTNZCPD256,
+ IX86_BUILTIN_VTESTZPS256,
+ IX86_BUILTIN_VTESTCPS256,
+ IX86_BUILTIN_VTESTNZCPS256,
+ IX86_BUILTIN_PTESTZ256,
+ IX86_BUILTIN_PTESTC256,
+ IX86_BUILTIN_PTESTNZC256,
+
+ IX86_BUILTIN_MOVMSKPD256,
+ IX86_BUILTIN_MOVMSKPS256,
+
+ /* TFmode support builtins. */
+ IX86_BUILTIN_INFQ,
+ IX86_BUILTIN_FABSQ,
+ IX86_BUILTIN_COPYSIGNQ,
+
+ /* SSE5 instructions */
+ IX86_BUILTIN_FMADDSS,
+ IX86_BUILTIN_FMADDSD,
+ IX86_BUILTIN_FMADDPS,
+ IX86_BUILTIN_FMADDPD,
+ IX86_BUILTIN_FMSUBSS,
+ IX86_BUILTIN_FMSUBSD,
+ IX86_BUILTIN_FMSUBPS,
+ IX86_BUILTIN_FMSUBPD,
+ IX86_BUILTIN_FNMADDSS,
+ IX86_BUILTIN_FNMADDSD,
+ IX86_BUILTIN_FNMADDPS,
+ IX86_BUILTIN_FNMADDPD,
+ IX86_BUILTIN_FNMSUBSS,
+ IX86_BUILTIN_FNMSUBSD,
+ IX86_BUILTIN_FNMSUBPS,
+ IX86_BUILTIN_FNMSUBPD,
+ IX86_BUILTIN_PCMOV,
+ IX86_BUILTIN_PCMOV_V2DI,
+ IX86_BUILTIN_PCMOV_V4SI,
+ IX86_BUILTIN_PCMOV_V8HI,
+ IX86_BUILTIN_PCMOV_V16QI,
+ IX86_BUILTIN_PCMOV_V4SF,
+ IX86_BUILTIN_PCMOV_V2DF,
+ IX86_BUILTIN_PPERM,
+ IX86_BUILTIN_PERMPS,
+ IX86_BUILTIN_PERMPD,
+ IX86_BUILTIN_PMACSSWW,
+ IX86_BUILTIN_PMACSWW,
+ IX86_BUILTIN_PMACSSWD,
+ IX86_BUILTIN_PMACSWD,
+ IX86_BUILTIN_PMACSSDD,
+ IX86_BUILTIN_PMACSDD,
+ IX86_BUILTIN_PMACSSDQL,
+ IX86_BUILTIN_PMACSSDQH,
+ IX86_BUILTIN_PMACSDQL,
+ IX86_BUILTIN_PMACSDQH,
+ IX86_BUILTIN_PMADCSSWD,
+ IX86_BUILTIN_PMADCSWD,
+ IX86_BUILTIN_PHADDBW,
+ IX86_BUILTIN_PHADDBD,
+ IX86_BUILTIN_PHADDBQ,
+ IX86_BUILTIN_PHADDWD,
+ IX86_BUILTIN_PHADDWQ,
+ IX86_BUILTIN_PHADDDQ,
+ IX86_BUILTIN_PHADDUBW,
+ IX86_BUILTIN_PHADDUBD,
+ IX86_BUILTIN_PHADDUBQ,
+ IX86_BUILTIN_PHADDUWD,
+ IX86_BUILTIN_PHADDUWQ,
+ IX86_BUILTIN_PHADDUDQ,
+ IX86_BUILTIN_PHSUBBW,
+ IX86_BUILTIN_PHSUBWD,
+ IX86_BUILTIN_PHSUBDQ,
+ IX86_BUILTIN_PROTB,
+ IX86_BUILTIN_PROTW,
+ IX86_BUILTIN_PROTD,
+ IX86_BUILTIN_PROTQ,
+ IX86_BUILTIN_PROTB_IMM,
+ IX86_BUILTIN_PROTW_IMM,
+ IX86_BUILTIN_PROTD_IMM,
+ IX86_BUILTIN_PROTQ_IMM,
+ IX86_BUILTIN_PSHLB,
+ IX86_BUILTIN_PSHLW,
+ IX86_BUILTIN_PSHLD,
+ IX86_BUILTIN_PSHLQ,
+ IX86_BUILTIN_PSHAB,
+ IX86_BUILTIN_PSHAW,
+ IX86_BUILTIN_PSHAD,
+ IX86_BUILTIN_PSHAQ,
+ IX86_BUILTIN_FRCZSS,
+ IX86_BUILTIN_FRCZSD,
+ IX86_BUILTIN_FRCZPS,
+ IX86_BUILTIN_FRCZPD,
+ IX86_BUILTIN_CVTPH2PS,
+ IX86_BUILTIN_CVTPS2PH,
+
+ IX86_BUILTIN_COMEQSS,
+ IX86_BUILTIN_COMNESS,
+ IX86_BUILTIN_COMLTSS,
+ IX86_BUILTIN_COMLESS,
+ IX86_BUILTIN_COMGTSS,
+ IX86_BUILTIN_COMGESS,
+ IX86_BUILTIN_COMUEQSS,
+ IX86_BUILTIN_COMUNESS,
+ IX86_BUILTIN_COMULTSS,
+ IX86_BUILTIN_COMULESS,
+ IX86_BUILTIN_COMUGTSS,
+ IX86_BUILTIN_COMUGESS,
+ IX86_BUILTIN_COMORDSS,
+ IX86_BUILTIN_COMUNORDSS,
+ IX86_BUILTIN_COMFALSESS,
+ IX86_BUILTIN_COMTRUESS,
+
+ IX86_BUILTIN_COMEQSD,
+ IX86_BUILTIN_COMNESD,
+ IX86_BUILTIN_COMLTSD,
+ IX86_BUILTIN_COMLESD,
+ IX86_BUILTIN_COMGTSD,
+ IX86_BUILTIN_COMGESD,
+ IX86_BUILTIN_COMUEQSD,
+ IX86_BUILTIN_COMUNESD,
+ IX86_BUILTIN_COMULTSD,
+ IX86_BUILTIN_COMULESD,
+ IX86_BUILTIN_COMUGTSD,
+ IX86_BUILTIN_COMUGESD,
+ IX86_BUILTIN_COMORDSD,
+ IX86_BUILTIN_COMUNORDSD,
+ IX86_BUILTIN_COMFALSESD,
+ IX86_BUILTIN_COMTRUESD,
+
+ IX86_BUILTIN_COMEQPS,
+ IX86_BUILTIN_COMNEPS,
+ IX86_BUILTIN_COMLTPS,
+ IX86_BUILTIN_COMLEPS,
+ IX86_BUILTIN_COMGTPS,
+ IX86_BUILTIN_COMGEPS,
+ IX86_BUILTIN_COMUEQPS,
+ IX86_BUILTIN_COMUNEPS,
+ IX86_BUILTIN_COMULTPS,
+ IX86_BUILTIN_COMULEPS,
+ IX86_BUILTIN_COMUGTPS,
+ IX86_BUILTIN_COMUGEPS,
+ IX86_BUILTIN_COMORDPS,
+ IX86_BUILTIN_COMUNORDPS,
+ IX86_BUILTIN_COMFALSEPS,
+ IX86_BUILTIN_COMTRUEPS,
+
+ IX86_BUILTIN_COMEQPD,
+ IX86_BUILTIN_COMNEPD,
+ IX86_BUILTIN_COMLTPD,
+ IX86_BUILTIN_COMLEPD,
+ IX86_BUILTIN_COMGTPD,
+ IX86_BUILTIN_COMGEPD,
+ IX86_BUILTIN_COMUEQPD,
+ IX86_BUILTIN_COMUNEPD,
+ IX86_BUILTIN_COMULTPD,
+ IX86_BUILTIN_COMULEPD,
+ IX86_BUILTIN_COMUGTPD,
+ IX86_BUILTIN_COMUGEPD,
+ IX86_BUILTIN_COMORDPD,
+ IX86_BUILTIN_COMUNORDPD,
+ IX86_BUILTIN_COMFALSEPD,
+ IX86_BUILTIN_COMTRUEPD,
+
+ IX86_BUILTIN_PCOMEQUB,
+ IX86_BUILTIN_PCOMNEUB,
+ IX86_BUILTIN_PCOMLTUB,
+ IX86_BUILTIN_PCOMLEUB,
+ IX86_BUILTIN_PCOMGTUB,
+ IX86_BUILTIN_PCOMGEUB,
+ IX86_BUILTIN_PCOMFALSEUB,
+ IX86_BUILTIN_PCOMTRUEUB,
+ IX86_BUILTIN_PCOMEQUW,
+ IX86_BUILTIN_PCOMNEUW,
+ IX86_BUILTIN_PCOMLTUW,
+ IX86_BUILTIN_PCOMLEUW,
+ IX86_BUILTIN_PCOMGTUW,
+ IX86_BUILTIN_PCOMGEUW,
+ IX86_BUILTIN_PCOMFALSEUW,
+ IX86_BUILTIN_PCOMTRUEUW,
+ IX86_BUILTIN_PCOMEQUD,
+ IX86_BUILTIN_PCOMNEUD,
+ IX86_BUILTIN_PCOMLTUD,
+ IX86_BUILTIN_PCOMLEUD,
+ IX86_BUILTIN_PCOMGTUD,
+ IX86_BUILTIN_PCOMGEUD,
+ IX86_BUILTIN_PCOMFALSEUD,
+ IX86_BUILTIN_PCOMTRUEUD,
+ IX86_BUILTIN_PCOMEQUQ,
+ IX86_BUILTIN_PCOMNEUQ,
+ IX86_BUILTIN_PCOMLTUQ,
+ IX86_BUILTIN_PCOMLEUQ,
+ IX86_BUILTIN_PCOMGTUQ,
+ IX86_BUILTIN_PCOMGEUQ,
+ IX86_BUILTIN_PCOMFALSEUQ,
+ IX86_BUILTIN_PCOMTRUEUQ,
+
+ IX86_BUILTIN_PCOMEQB,
+ IX86_BUILTIN_PCOMNEB,
+ IX86_BUILTIN_PCOMLTB,
+ IX86_BUILTIN_PCOMLEB,
+ IX86_BUILTIN_PCOMGTB,
+ IX86_BUILTIN_PCOMGEB,
+ IX86_BUILTIN_PCOMFALSEB,
+ IX86_BUILTIN_PCOMTRUEB,
+ IX86_BUILTIN_PCOMEQW,
+ IX86_BUILTIN_PCOMNEW,
+ IX86_BUILTIN_PCOMLTW,
+ IX86_BUILTIN_PCOMLEW,
+ IX86_BUILTIN_PCOMGTW,
+ IX86_BUILTIN_PCOMGEW,
+ IX86_BUILTIN_PCOMFALSEW,
+ IX86_BUILTIN_PCOMTRUEW,
+ IX86_BUILTIN_PCOMEQD,
+ IX86_BUILTIN_PCOMNED,
+ IX86_BUILTIN_PCOMLTD,
+ IX86_BUILTIN_PCOMLED,
+ IX86_BUILTIN_PCOMGTD,
+ IX86_BUILTIN_PCOMGED,
+ IX86_BUILTIN_PCOMFALSED,
+ IX86_BUILTIN_PCOMTRUED,
+ IX86_BUILTIN_PCOMEQQ,
+ IX86_BUILTIN_PCOMNEQ,
+ IX86_BUILTIN_PCOMLTQ,
+ IX86_BUILTIN_PCOMLEQ,
+ IX86_BUILTIN_PCOMGTQ,
+ IX86_BUILTIN_PCOMGEQ,
+ IX86_BUILTIN_PCOMFALSEQ,
+ IX86_BUILTIN_PCOMTRUEQ,
+
+ IX86_BUILTIN_MAX
+};
+
+/* Table for the ix86 builtin decls. */
+static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
+
+/* Table of all of the builtin functions that are possible with different ISA's
+ but are waiting to be built until a function is declared to use that
+ ISA. */
+struct builtin_isa GTY(())
+{
+ tree type; /* builtin type to use in the declaration */
+ const char *name; /* function name */
+ int isa; /* isa_flags this builtin is defined for */
+ bool const_p; /* true if the declaration is constant */
+};
+
+static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
+
+
+/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
+ * of which isa_flags to use in the ix86_builtins_isa array. Stores the
+ * function decl in the ix86_builtins array. Returns the function decl or
+ * NULL_TREE, if the builtin was not added.
+ *
+ * If the front end has a special hook for builtin functions, delay adding
+ * builtin functions that aren't in the current ISA until the ISA is changed
+ * with function specific optimization. Doing so, can save about 300K for the
+ * default compiler. When the builtin is expanded, check at that time whether
+ * it is valid.
+ *
+ * If the front end doesn't have a special hook, record all builtins, even if
+ * it isn't an instruction set in the current ISA in case the user uses
+ * function specific options for a different ISA, so that we don't get scope
+ * errors if a builtin is added in the middle of a function scope. */
+
+static inline tree
+def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
+{
+ tree decl = NULL_TREE;
+
+ if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
+ {
+ ix86_builtins_isa[(int) code].isa = mask;
+
+ if ((mask & ix86_isa_flags) != 0
+ || (lang_hooks.builtin_function
+ == lang_hooks.builtin_function_ext_scope))
+
+ {
+ decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
+ NULL_TREE);
+ ix86_builtins[(int) code] = decl;
+ ix86_builtins_isa[(int) code].type = NULL_TREE;
+ }
+ else
+ {
+ ix86_builtins[(int) code] = NULL_TREE;
+ ix86_builtins_isa[(int) code].const_p = false;
+ ix86_builtins_isa[(int) code].type = type;
+ ix86_builtins_isa[(int) code].name = name;
+ }
+ }
+
+ return decl;
+}
+
+/* Like def_builtin, but also marks the function decl "const". */
+
+static inline tree
+def_builtin_const (int mask, const char *name, tree type,
+ enum ix86_builtins code)
+{
+ tree decl = def_builtin (mask, name, type, code);
+ if (decl)
+ TREE_READONLY (decl) = 1;
+ else
+ ix86_builtins_isa[(int) code].const_p = true;
+
+ return decl;
+}
+
+/* Add any new builtin functions for a given ISA that may not have been
+ declared. This saves a bit of space compared to adding all of the
+ declarations to the tree, even if we didn't use them. */
+
+static void
+ix86_add_new_builtins (int isa)
+{
+ int i;
+ tree decl;
+
+ for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
+ {
+ if ((ix86_builtins_isa[i].isa & isa) != 0
+ && ix86_builtins_isa[i].type != NULL_TREE)
+ {
+ decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
+ ix86_builtins_isa[i].type,
+ i, BUILT_IN_MD, NULL,
+ NULL_TREE);
+
+ ix86_builtins[i] = decl;
+ ix86_builtins_isa[i].type = NULL_TREE;
+ if (ix86_builtins_isa[i].const_p)
+ TREE_READONLY (decl) = 1;
+ }
+ }
+}
+
+/* Bits for builtin_description.flag. */
+
+/* Set when we don't support the comparison natively, and should
+ swap_comparison in order to support it. */
+#define BUILTIN_DESC_SWAP_OPERANDS 1
+
+struct builtin_description
+{
+ const unsigned int mask;
+ const enum insn_code icode;
+ const char *const name;
+ const enum ix86_builtins code;
+ const enum rtx_code comparison;
+ const int flag;
+};
+
+static const struct builtin_description bdesc_comi[] =
+{
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
+};
+
+static const struct builtin_description bdesc_pcmpestr[] =
+{
+ /* SSE4.2 */
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
+};
+
+static const struct builtin_description bdesc_pcmpistr[] =
+{
+ /* SSE4.2 */
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
+};
+
+/* Special builtin types */
+enum ix86_special_builtin_type
+{
+ SPECIAL_FTYPE_UNKNOWN,
+ VOID_FTYPE_VOID,
+ V32QI_FTYPE_PCCHAR,
+ V16QI_FTYPE_PCCHAR,
+ V8SF_FTYPE_PCV4SF,
+ V8SF_FTYPE_PCFLOAT,
+ V4DF_FTYPE_PCV2DF,
+ V4DF_FTYPE_PCDOUBLE,
+ V4SF_FTYPE_PCFLOAT,
+ V2DF_FTYPE_PCDOUBLE,
+ V8SF_FTYPE_PCV8SF_V8SF,
+ V4DF_FTYPE_PCV4DF_V4DF,
+ V4SF_FTYPE_V4SF_PCV2SF,
+ V4SF_FTYPE_PCV4SF_V4SF,
+ V2DF_FTYPE_V2DF_PCDOUBLE,
+ V2DF_FTYPE_PCV2DF_V2DF,
+ V2DI_FTYPE_PV2DI,
+ VOID_FTYPE_PV2SF_V4SF,
+ VOID_FTYPE_PV4DI_V4DI,
+ VOID_FTYPE_PV2DI_V2DI,
+ VOID_FTYPE_PCHAR_V32QI,
+ VOID_FTYPE_PCHAR_V16QI,
+ VOID_FTYPE_PFLOAT_V8SF,
+ VOID_FTYPE_PFLOAT_V4SF,
+ VOID_FTYPE_PDOUBLE_V4DF,
+ VOID_FTYPE_PDOUBLE_V2DF,
+ VOID_FTYPE_PDI_DI,
+ VOID_FTYPE_PINT_INT,
+ VOID_FTYPE_PV8SF_V8SF_V8SF,
+ VOID_FTYPE_PV4DF_V4DF_V4DF,
+ VOID_FTYPE_PV4SF_V4SF_V4SF,
+ VOID_FTYPE_PV2DF_V2DF_V2DF
+};
+
+/* Builtin types */
+enum ix86_builtin_type
+{
+ FTYPE_UNKNOWN,
+ FLOAT128_FTYPE_FLOAT128,
+ FLOAT_FTYPE_FLOAT,
+ FLOAT128_FTYPE_FLOAT128_FLOAT128,
+ INT_FTYPE_V8SF_V8SF_PTEST,
+ INT_FTYPE_V4DI_V4DI_PTEST,
+ INT_FTYPE_V4DF_V4DF_PTEST,
+ INT_FTYPE_V4SF_V4SF_PTEST,
+ INT_FTYPE_V2DI_V2DI_PTEST,
+ INT_FTYPE_V2DF_V2DF_PTEST,
+ INT64_FTYPE_V4SF,
+ INT64_FTYPE_V2DF,
+ INT_FTYPE_V16QI,
+ INT_FTYPE_V8QI,
+ INT_FTYPE_V8SF,
+ INT_FTYPE_V4DF,
+ INT_FTYPE_V4SF,
+ INT_FTYPE_V2DF,
+ V16QI_FTYPE_V16QI,
+ V8SI_FTYPE_V8SF,
+ V8SI_FTYPE_V4SI,
+ V8HI_FTYPE_V8HI,
+ V8HI_FTYPE_V16QI,
+ V8QI_FTYPE_V8QI,
+ V8SF_FTYPE_V8SF,
+ V8SF_FTYPE_V8SI,
+ V8SF_FTYPE_V4SF,
+ V4SI_FTYPE_V4SI,
+ V4SI_FTYPE_V16QI,
+ V4SI_FTYPE_V8SI,
+ V4SI_FTYPE_V8HI,
+ V4SI_FTYPE_V4DF,
+ V4SI_FTYPE_V4SF,
+ V4SI_FTYPE_V2DF,
+ V4HI_FTYPE_V4HI,
+ V4DF_FTYPE_V4DF,
+ V4DF_FTYPE_V4SI,
+ V4DF_FTYPE_V4SF,
+ V4DF_FTYPE_V2DF,
+ V4SF_FTYPE_V4DF,
+ V4SF_FTYPE_V4SF,
+ V4SF_FTYPE_V4SF_VEC_MERGE,
+ V4SF_FTYPE_V8SF,
+ V4SF_FTYPE_V4SI,
+ V4SF_FTYPE_V2DF,
+ V2DI_FTYPE_V2DI,
+ V2DI_FTYPE_V16QI,
+ V2DI_FTYPE_V8HI,
+ V2DI_FTYPE_V4SI,
+ V2DF_FTYPE_V2DF,
+ V2DF_FTYPE_V2DF_VEC_MERGE,
+ V2DF_FTYPE_V4SI,
+ V2DF_FTYPE_V4DF,
+ V2DF_FTYPE_V4SF,
+ V2DF_FTYPE_V2SI,
+ V2SI_FTYPE_V2SI,
+ V2SI_FTYPE_V4SF,
+ V2SI_FTYPE_V2SF,
+ V2SI_FTYPE_V2DF,
+ V2SF_FTYPE_V2SF,
+ V2SF_FTYPE_V2SI,
+ V16QI_FTYPE_V16QI_V16QI,
+ V16QI_FTYPE_V8HI_V8HI,
+ V8QI_FTYPE_V8QI_V8QI,
+ V8QI_FTYPE_V4HI_V4HI,
+ V8HI_FTYPE_V8HI_V8HI,
+ V8HI_FTYPE_V8HI_V8HI_COUNT,
+ V8HI_FTYPE_V16QI_V16QI,
+ V8HI_FTYPE_V4SI_V4SI,
+ V8HI_FTYPE_V8HI_SI_COUNT,
+ V8SF_FTYPE_V8SF_V8SF,
+ V8SF_FTYPE_V8SF_V8SI,
+ V4SI_FTYPE_V4SI_V4SI,
+ V4SI_FTYPE_V4SI_V4SI_COUNT,
+ V4SI_FTYPE_V8HI_V8HI,
+ V4SI_FTYPE_V4SF_V4SF,
+ V4SI_FTYPE_V2DF_V2DF,
+ V4SI_FTYPE_V4SI_SI_COUNT,
+ V4HI_FTYPE_V4HI_V4HI,
+ V4HI_FTYPE_V4HI_V4HI_COUNT,
+ V4HI_FTYPE_V8QI_V8QI,
+ V4HI_FTYPE_V2SI_V2SI,
+ V4HI_FTYPE_V4HI_SI_COUNT,
+ V4DF_FTYPE_V4DF_V4DF,
+ V4DF_FTYPE_V4DF_V4DI,
+ V4SF_FTYPE_V4SF_V4SF,
+ V4SF_FTYPE_V4SF_V4SF_SWAP,
+ V4SF_FTYPE_V4SF_V4SI,
+ V4SF_FTYPE_V4SF_V2SI,
+ V4SF_FTYPE_V4SF_V2DF,
+ V4SF_FTYPE_V4SF_DI,
+ V4SF_FTYPE_V4SF_SI,
+ V2DI_FTYPE_V2DI_V2DI,
+ V2DI_FTYPE_V2DI_V2DI_COUNT,
+ V2DI_FTYPE_V16QI_V16QI,
+ V2DI_FTYPE_V4SI_V4SI,
+ V2DI_FTYPE_V2DI_V16QI,
+ V2DI_FTYPE_V2DF_V2DF,
+ V2DI_FTYPE_V2DI_SI_COUNT,
+ V2SI_FTYPE_V2SI_V2SI,
+ V2SI_FTYPE_V2SI_V2SI_COUNT,
+ V2SI_FTYPE_V4HI_V4HI,
+ V2SI_FTYPE_V2SF_V2SF,
+ V2SI_FTYPE_V2SI_SI_COUNT,
+ V2DF_FTYPE_V2DF_V2DF,
+ V2DF_FTYPE_V2DF_V2DF_SWAP,
+ V2DF_FTYPE_V2DF_V4SF,
+ V2DF_FTYPE_V2DF_V2DI,
+ V2DF_FTYPE_V2DF_DI,
+ V2DF_FTYPE_V2DF_SI,
+ V2SF_FTYPE_V2SF_V2SF,
+ V1DI_FTYPE_V1DI_V1DI,
+ V1DI_FTYPE_V1DI_V1DI_COUNT,
+ V1DI_FTYPE_V8QI_V8QI,
+ V1DI_FTYPE_V2SI_V2SI,
+ V1DI_FTYPE_V1DI_SI_COUNT,
+ UINT64_FTYPE_UINT64_UINT64,
+ UINT_FTYPE_UINT_UINT,
+ UINT_FTYPE_UINT_USHORT,
+ UINT_FTYPE_UINT_UCHAR,
+ V8HI_FTYPE_V8HI_INT,
+ V4SI_FTYPE_V4SI_INT,
+ V4HI_FTYPE_V4HI_INT,
+ V8SF_FTYPE_V8SF_INT,
+ V4SI_FTYPE_V8SI_INT,
+ V4SF_FTYPE_V8SF_INT,
+ V2DF_FTYPE_V4DF_INT,
+ V4DF_FTYPE_V4DF_INT,
+ V4SF_FTYPE_V4SF_INT,
+ V2DI_FTYPE_V2DI_INT,
+ V2DI2TI_FTYPE_V2DI_INT,
+ V2DF_FTYPE_V2DF_INT,
+ V16QI_FTYPE_V16QI_V16QI_V16QI,
+ V8SF_FTYPE_V8SF_V8SF_V8SF,
+ V4DF_FTYPE_V4DF_V4DF_V4DF,
+ V4SF_FTYPE_V4SF_V4SF_V4SF,
+ V2DF_FTYPE_V2DF_V2DF_V2DF,
+ V16QI_FTYPE_V16QI_V16QI_INT,
+ V8SI_FTYPE_V8SI_V8SI_INT,
+ V8SI_FTYPE_V8SI_V4SI_INT,
+ V8HI_FTYPE_V8HI_V8HI_INT,
+ V8SF_FTYPE_V8SF_V8SF_INT,
+ V8SF_FTYPE_V8SF_V4SF_INT,
+ V4SI_FTYPE_V4SI_V4SI_INT,
+ V4DF_FTYPE_V4DF_V4DF_INT,
+ V4DF_FTYPE_V4DF_V2DF_INT,
+ V4SF_FTYPE_V4SF_V4SF_INT,
+ V2DI_FTYPE_V2DI_V2DI_INT,
+ V2DI2TI_FTYPE_V2DI_V2DI_INT,
+ V1DI2DI_FTYPE_V1DI_V1DI_INT,
+ V2DF_FTYPE_V2DF_V2DF_INT,
+ V2DI_FTYPE_V2DI_UINT_UINT,
+ V2DI_FTYPE_V2DI_V2DI_UINT_UINT
+};
+
+/* Special builtins with variable number of arguments. */
+static const struct builtin_description bdesc_special_args[] =
+{
+ /* MMX */
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+ /* 3DNow! */
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+ /* SSE */
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
+
+ /* SSE or 3DNow!A */
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
+
+ /* SSE2 */
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
+
+ /* SSE3 */
+ { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+
+ /* SSE4.1 */
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
+
+ /* SSE4A */
+ { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+ { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+
+ /* AVX */
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
+ { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
+};
+
+/* Builtins with variable number of arguments. */
+static const struct builtin_description bdesc_args[] =
+{
+ /* MMX */
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
+
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
+
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
+
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
+
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
+
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
+
+ /* 3DNow! */
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
+
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+ /* 3DNow!A */
+ { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
+ { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
+ { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
+ { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
+ { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+ { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
+
+ /* SSE */
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
+
+ /* SSE MMX or 3Dnow!A */
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
+
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
+
+ /* SSE2 */
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
+ { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
+ { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
+ { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
+
+ /* SSE2 MMX */
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
+
+ /* SSE3 */
+ { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
+ { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+
+ { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+
+ /* SSSE3 */
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
+
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+
+ /* SSSE3. */
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
+
+ /* SSE4.1 */
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
+
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
+
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+
+ /* SSE4.1 and SSE5 */
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+
+ /* SSE4.2 */
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+ { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+
+ /* SSE4A */
+ { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
+ { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
+ { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
+ { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+ /* AES */
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
+
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+
+ /* PCLMUL */
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
+
+ /* AVX */
+ { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
+};
+
+/* SSE5 */
+enum multi_arg_type {
+ MULTI_ARG_UNKNOWN,
+ MULTI_ARG_3_SF,
+ MULTI_ARG_3_DF,
+ MULTI_ARG_3_DI,
+ MULTI_ARG_3_SI,
+ MULTI_ARG_3_SI_DI,
+ MULTI_ARG_3_HI,
+ MULTI_ARG_3_HI_SI,
+ MULTI_ARG_3_QI,
+ MULTI_ARG_3_PERMPS,
+ MULTI_ARG_3_PERMPD,
+ MULTI_ARG_2_SF,
+ MULTI_ARG_2_DF,
+ MULTI_ARG_2_DI,
+ MULTI_ARG_2_SI,
+ MULTI_ARG_2_HI,
+ MULTI_ARG_2_QI,
+ MULTI_ARG_2_DI_IMM,
+ MULTI_ARG_2_SI_IMM,
+ MULTI_ARG_2_HI_IMM,
+ MULTI_ARG_2_QI_IMM,
+ MULTI_ARG_2_SF_CMP,
+ MULTI_ARG_2_DF_CMP,
+ MULTI_ARG_2_DI_CMP,
+ MULTI_ARG_2_SI_CMP,
+ MULTI_ARG_2_HI_CMP,
+ MULTI_ARG_2_QI_CMP,
+ MULTI_ARG_2_DI_TF,
+ MULTI_ARG_2_SI_TF,
+ MULTI_ARG_2_HI_TF,
+ MULTI_ARG_2_QI_TF,
+ MULTI_ARG_2_SF_TF,
+ MULTI_ARG_2_DF_TF,
+ MULTI_ARG_1_SF,
+ MULTI_ARG_1_DF,
+ MULTI_ARG_1_DI,
+ MULTI_ARG_1_SI,
+ MULTI_ARG_1_HI,
+ MULTI_ARG_1_QI,
+ MULTI_ARG_1_SI_DI,
+ MULTI_ARG_1_HI_DI,
+ MULTI_ARG_1_HI_SI,
+ MULTI_ARG_1_QI_DI,
+ MULTI_ARG_1_QI_SI,
+ MULTI_ARG_1_QI_HI,
+ MULTI_ARG_1_PH2PS,
+ MULTI_ARG_1_PS2PH
+};
+
+static const struct builtin_description bdesc_multi_arg[] =
+{
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
+};
+
+/* Set up all the MMX/SSE builtins, even builtins for instructions that are not
+ in the current target ISA to allow the user to compile particular modules
+ with different target specific options that differ from the command line
+ options. */
+static void
+ix86_init_mmx_sse_builtins (void)
+{
+ const struct builtin_description * d;
+ size_t i;
+
+ tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
+ tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+ tree V1DI_type_node
+ = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
+ tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
+ tree V2DI_type_node
+ = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
+ tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
+ tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
+ tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
+ tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
+ tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
+ tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
+
+ tree pchar_type_node = build_pointer_type (char_type_node);
+ tree pcchar_type_node
+ = build_pointer_type (build_type_variant (char_type_node, 1, 0));
+ tree pfloat_type_node = build_pointer_type (float_type_node);
+ tree pcfloat_type_node
+ = build_pointer_type (build_type_variant (float_type_node, 1, 0));
+ tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
+ tree pcv2sf_type_node
+ = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
+ tree pv2di_type_node = build_pointer_type (V2DI_type_node);
+ tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
+
+ /* Comparisons. */
+ tree int_ftype_v4sf_v4sf
+ = build_function_type_list (integer_type_node,
+ V4SF_type_node, V4SF_type_node, NULL_TREE);
+ tree v4si_ftype_v4sf_v4sf
+ = build_function_type_list (V4SI_type_node,
+ V4SF_type_node, V4SF_type_node, NULL_TREE);
+ /* MMX/SSE/integer conversions. */
+ tree int_ftype_v4sf
+ = build_function_type_list (integer_type_node,
+ V4SF_type_node, NULL_TREE);
+ tree int64_ftype_v4sf
+ = build_function_type_list (long_long_integer_type_node,
+ V4SF_type_node, NULL_TREE);
+ tree int_ftype_v8qi
+ = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_int
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, integer_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_int64
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, long_long_integer_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4sf_v2si
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V2SI_type_node, NULL_TREE);
+
+ /* Miscellaneous. */
+ tree v8qi_ftype_v4hi_v4hi
+ = build_function_type_list (V8QI_type_node,
+ V4HI_type_node, V4HI_type_node, NULL_TREE);
+ tree v4hi_ftype_v2si_v2si
+ = build_function_type_list (V4HI_type_node,
+ V2SI_type_node, V2SI_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_v4sf_int
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SF_type_node,
+ integer_type_node, NULL_TREE);
+ tree v2si_ftype_v4hi_v4hi
+ = build_function_type_list (V2SI_type_node,
+ V4HI_type_node, V4HI_type_node, NULL_TREE);
+ tree v4hi_ftype_v4hi_int
+ = build_function_type_list (V4HI_type_node,
+ V4HI_type_node, integer_type_node, NULL_TREE);
+ tree v2si_ftype_v2si_int
+ = build_function_type_list (V2SI_type_node,
+ V2SI_type_node, integer_type_node, NULL_TREE);
+ tree v1di_ftype_v1di_int
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node, integer_type_node, NULL_TREE);
+
+ tree void_ftype_void
+ = build_function_type (void_type_node, void_list_node);
+ tree void_ftype_unsigned
+ = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
+ tree void_ftype_unsigned_unsigned
+ = build_function_type_list (void_type_node, unsigned_type_node,
+ unsigned_type_node, NULL_TREE);
+ tree void_ftype_pcvoid_unsigned_unsigned
+ = build_function_type_list (void_type_node, const_ptr_type_node,
+ unsigned_type_node, unsigned_type_node,
+ NULL_TREE);
+ tree unsigned_ftype_void
+ = build_function_type (unsigned_type_node, void_list_node);
+ tree v2si_ftype_v4sf
+ = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
+ /* Loads/stores. */
+ tree void_ftype_v8qi_v8qi_pchar
+ = build_function_type_list (void_type_node,
+ V8QI_type_node, V8QI_type_node,
+ pchar_type_node, NULL_TREE);
+ tree v4sf_ftype_pcfloat
+ = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_pcv2sf
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, pcv2sf_type_node, NULL_TREE);
+ tree void_ftype_pv2sf_v4sf
+ = build_function_type_list (void_type_node,
+ pv2sf_type_node, V4SF_type_node, NULL_TREE);
+ tree void_ftype_pfloat_v4sf
+ = build_function_type_list (void_type_node,
+ pfloat_type_node, V4SF_type_node, NULL_TREE);
+ tree void_ftype_pdi_di
+ = build_function_type_list (void_type_node,
+ pdi_type_node, long_long_unsigned_type_node,
+ NULL_TREE);
+ tree void_ftype_pv2di_v2di
+ = build_function_type_list (void_type_node,
+ pv2di_type_node, V2DI_type_node, NULL_TREE);
+ /* Normal vector unops. */
+ tree v4sf_ftype_v4sf
+ = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
+ tree v16qi_ftype_v16qi
+ = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
+ tree v8hi_ftype_v8hi
+ = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
+ tree v4si_ftype_v4si
+ = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
+ tree v8qi_ftype_v8qi
+ = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
+ tree v4hi_ftype_v4hi
+ = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
+
+ /* Normal vector binops. */
+ tree v4sf_ftype_v4sf_v4sf
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SF_type_node, NULL_TREE);
+ tree v8qi_ftype_v8qi_v8qi
+ = build_function_type_list (V8QI_type_node,
+ V8QI_type_node, V8QI_type_node, NULL_TREE);
+ tree v4hi_ftype_v4hi_v4hi
+ = build_function_type_list (V4HI_type_node,
+ V4HI_type_node, V4HI_type_node, NULL_TREE);
+ tree v2si_ftype_v2si_v2si
+ = build_function_type_list (V2SI_type_node,
+ V2SI_type_node, V2SI_type_node, NULL_TREE);
+ tree v1di_ftype_v1di_v1di
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node, V1DI_type_node, NULL_TREE);
+ tree v1di_ftype_v1di_v1di_int
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node, V1DI_type_node,
+ integer_type_node, NULL_TREE);
+ tree v2si_ftype_v2sf
+ = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
+ tree v2sf_ftype_v2si
+ = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
+ tree v2si_ftype_v2si
+ = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
+ tree v2sf_ftype_v2sf
+ = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
+ tree v2sf_ftype_v2sf_v2sf
+ = build_function_type_list (V2SF_type_node,
+ V2SF_type_node, V2SF_type_node, NULL_TREE);
+ tree v2si_ftype_v2sf_v2sf
+ = build_function_type_list (V2SI_type_node,
+ V2SF_type_node, V2SF_type_node, NULL_TREE);
+ tree pint_type_node = build_pointer_type (integer_type_node);
+ tree pdouble_type_node = build_pointer_type (double_type_node);
+ tree pcdouble_type_node = build_pointer_type (
+ build_type_variant (double_type_node, 1, 0));
+ tree int_ftype_v2df_v2df
+ = build_function_type_list (integer_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
+
+ tree void_ftype_pcvoid
+ = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
+ tree v4sf_ftype_v4si
+ = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
+ tree v4si_ftype_v4sf
+ = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
+ tree v2df_ftype_v4si
+ = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
+ tree v4si_ftype_v2df
+ = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
+ tree v4si_ftype_v2df_v2df
+ = build_function_type_list (V4SI_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
+ tree v2si_ftype_v2df
+ = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
+ tree v4sf_ftype_v2df
+ = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2si
+ = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
+ tree v2df_ftype_v4sf
+ = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
+ tree int_ftype_v2df
+ = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
+ tree int64_ftype_v2df
+ = build_function_type_list (long_long_integer_type_node,
+ V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_int
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, integer_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_int64
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, long_long_integer_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4sf_v2df
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v4sf
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V4SF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v2df_int
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v2df_pcdouble
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, pcdouble_type_node, NULL_TREE);
+ tree void_ftype_pdouble_v2df
+ = build_function_type_list (void_type_node,
+ pdouble_type_node, V2DF_type_node, NULL_TREE);
+ tree void_ftype_pint_int
+ = build_function_type_list (void_type_node,
+ pint_type_node, integer_type_node, NULL_TREE);
+ tree void_ftype_v16qi_v16qi_pchar
+ = build_function_type_list (void_type_node,
+ V16QI_type_node, V16QI_type_node,
+ pchar_type_node, NULL_TREE);
+ tree v2df_ftype_pcdouble
+ = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v2df
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
+ tree v16qi_ftype_v16qi_v16qi
+ = build_function_type_list (V16QI_type_node,
+ V16QI_type_node, V16QI_type_node, NULL_TREE);
+ tree v8hi_ftype_v8hi_v8hi
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node, V8HI_type_node, NULL_TREE);
+ tree v4si_ftype_v4si_v4si
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node, V4SI_type_node, NULL_TREE);
+ tree v2di_ftype_v2di_v2di
+ = build_function_type_list (V2DI_type_node,
+ V2DI_type_node, V2DI_type_node, NULL_TREE);
+ tree v2di_ftype_v2df_v2df
+ = build_function_type_list (V2DI_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
+ tree v2df_ftype_v2df
+ = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
+ tree v2di_ftype_v2di_int
+ = build_function_type_list (V2DI_type_node,
+ V2DI_type_node, integer_type_node, NULL_TREE);
+ tree v2di_ftype_v2di_v2di_int
+ = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ V2DI_type_node, integer_type_node, NULL_TREE);
+ tree v4si_ftype_v4si_int
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node, integer_type_node, NULL_TREE);
+ tree v8hi_ftype_v8hi_int
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node, integer_type_node, NULL_TREE);
+ tree v4si_ftype_v8hi_v8hi
+ = build_function_type_list (V4SI_type_node,
+ V8HI_type_node, V8HI_type_node, NULL_TREE);
+ tree v1di_ftype_v8qi_v8qi
+ = build_function_type_list (V1DI_type_node,
+ V8QI_type_node, V8QI_type_node, NULL_TREE);
+ tree v1di_ftype_v2si_v2si
+ = build_function_type_list (V1DI_type_node,
+ V2SI_type_node, V2SI_type_node, NULL_TREE);
+ tree v2di_ftype_v16qi_v16qi
+ = build_function_type_list (V2DI_type_node,
+ V16QI_type_node, V16QI_type_node, NULL_TREE);
+ tree v2di_ftype_v4si_v4si
+ = build_function_type_list (V2DI_type_node,
+ V4SI_type_node, V4SI_type_node, NULL_TREE);
+ tree int_ftype_v16qi
+ = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
+ tree v16qi_ftype_pcchar
+ = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
+ tree void_ftype_pchar_v16qi
+ = build_function_type_list (void_type_node,
+ pchar_type_node, V16QI_type_node, NULL_TREE);
+
+ tree v2di_ftype_v2di_unsigned_unsigned
+ = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ unsigned_type_node, unsigned_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v2di_v2di_unsigned_unsigned
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
+ unsigned_type_node, unsigned_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v2di_v16qi
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v2df_v2df_v2df
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DF_type_node,
+ V2DF_type_node, NULL_TREE);
+ tree v4sf_ftype_v4sf_v4sf_v4sf
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SF_type_node,
+ V4SF_type_node, NULL_TREE);
+ tree v8hi_ftype_v16qi
+ = build_function_type_list (V8HI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v16qi
+ = build_function_type_list (V4SI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v16qi
+ = build_function_type_list (V2DI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v8hi
+ = build_function_type_list (V4SI_type_node, V8HI_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v8hi
+ = build_function_type_list (V2DI_type_node, V8HI_type_node,
+ NULL_TREE);
+ tree v2di_ftype_v4si
+ = build_function_type_list (V2DI_type_node, V4SI_type_node,
+ NULL_TREE);
+ tree v2di_ftype_pv2di
+ = build_function_type_list (V2DI_type_node, pv2di_type_node,
+ NULL_TREE);
+ tree v16qi_ftype_v16qi_v16qi_int
+ = build_function_type_list (V16QI_type_node, V16QI_type_node,
+ V16QI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v16qi_ftype_v16qi_v16qi_v16qi
+ = build_function_type_list (V16QI_type_node, V16QI_type_node,
+ V16QI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v8hi_ftype_v8hi_v8hi_int
+ = build_function_type_list (V8HI_type_node, V8HI_type_node,
+ V8HI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v4si_v4si_int
+ = build_function_type_list (V4SI_type_node, V4SI_type_node,
+ V4SI_type_node, integer_type_node,
+ NULL_TREE);
+ tree int_ftype_v2di_v2di
+ = build_function_type_list (integer_type_node,
+ V2DI_type_node, V2DI_type_node,
+ NULL_TREE);
+ tree int_ftype_v16qi_int_v16qi_int_int
+ = build_function_type_list (integer_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v16qi_ftype_v16qi_int_v16qi_int_int
+ = build_function_type_list (V16QI_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree int_ftype_v16qi_v16qi_int
+ = build_function_type_list (integer_type_node,
+ V16QI_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ NULL_TREE);
+
+ /* SSE5 instructions */
+ tree v2di_ftype_v2di_v2di_v2di
+ = build_function_type_list (V2DI_type_node,
+ V2DI_type_node,
+ V2DI_type_node,
+ V2DI_type_node,
+ NULL_TREE);
+
+ tree v4si_ftype_v4si_v4si_v4si
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node,
+ V4SI_type_node,
+ V4SI_type_node,
+ NULL_TREE);
+
+ tree v4si_ftype_v4si_v4si_v2di
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node,
+ V4SI_type_node,
+ V2DI_type_node,
+ NULL_TREE);
+
+ tree v8hi_ftype_v8hi_v8hi_v8hi
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node,
+ V8HI_type_node,
+ V8HI_type_node,
+ NULL_TREE);
+
+ tree v8hi_ftype_v8hi_v8hi_v4si
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node,
+ V8HI_type_node,
+ V4SI_type_node,
+ NULL_TREE);
+
+ tree v2df_ftype_v2df_v2df_v16qi
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node,
+ V2DF_type_node,
+ V16QI_type_node,
+ NULL_TREE);
+
+ tree v4sf_ftype_v4sf_v4sf_v16qi
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node,
+ V4SF_type_node,
+ V16QI_type_node,
+ NULL_TREE);
+
+ tree v2di_ftype_v2di_si
+ = build_function_type_list (V2DI_type_node,
+ V2DI_type_node,
+ integer_type_node,
+ NULL_TREE);
+
+ tree v4si_ftype_v4si_si
+ = build_function_type_list (V4SI_type_node,
+ V4SI_type_node,
+ integer_type_node,
+ NULL_TREE);
+
+ tree v8hi_ftype_v8hi_si
+ = build_function_type_list (V8HI_type_node,
+ V8HI_type_node,
+ integer_type_node,
+ NULL_TREE);
+
+ tree v16qi_ftype_v16qi_si
+ = build_function_type_list (V16QI_type_node,
+ V16QI_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4hi
+ = build_function_type_list (V4SF_type_node,
+ V4HI_type_node,
+ NULL_TREE);
+
+ tree v4hi_ftype_v4sf
+ = build_function_type_list (V4HI_type_node,
+ V4SF_type_node,
+ NULL_TREE);
+
+ tree v2di_ftype_v2di
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
+
+ tree v16qi_ftype_v8hi_v8hi
+ = build_function_type_list (V16QI_type_node,
+ V8HI_type_node, V8HI_type_node,
+ NULL_TREE);
+ tree v8hi_ftype_v4si_v4si
+ = build_function_type_list (V8HI_type_node,
+ V4SI_type_node, V4SI_type_node,
+ NULL_TREE);
+ tree v8hi_ftype_v16qi_v16qi
+ = build_function_type_list (V8HI_type_node,
+ V16QI_type_node, V16QI_type_node,
+ NULL_TREE);
+ tree v4hi_ftype_v8qi_v8qi
+ = build_function_type_list (V4HI_type_node,
+ V8QI_type_node, V8QI_type_node,
+ NULL_TREE);
+ tree unsigned_ftype_unsigned_uchar
+ = build_function_type_list (unsigned_type_node,
+ unsigned_type_node,
+ unsigned_char_type_node,
+ NULL_TREE);
+ tree unsigned_ftype_unsigned_ushort
+ = build_function_type_list (unsigned_type_node,
+ unsigned_type_node,
+ short_unsigned_type_node,
+ NULL_TREE);
+ tree unsigned_ftype_unsigned_unsigned
+ = build_function_type_list (unsigned_type_node,
+ unsigned_type_node,
+ unsigned_type_node,
+ NULL_TREE);
+ tree uint64_ftype_uint64_uint64
+ = build_function_type_list (long_long_unsigned_type_node,
+ long_long_unsigned_type_node,
+ long_long_unsigned_type_node,
+ NULL_TREE);
+ tree float_ftype_float
+ = build_function_type_list (float_type_node,
+ float_type_node,
+ NULL_TREE);
+
+ /* AVX builtins */
+ tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
+ V32QImode);
+ tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
+ V8SImode);
+ tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
+ V8SFmode);
+ tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
+ V4DImode);
+ tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
+ V4DFmode);
+ tree v8sf_ftype_v8sf
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree v8si_ftype_v8sf
+ = build_function_type_list (V8SI_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8si
+ = build_function_type_list (V8SF_type_node,
+ V8SI_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v4df
+ = build_function_type_list (V4SI_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4si
+ = build_function_type_list (V4DF_type_node,
+ V4SI_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4sf
+ = build_function_type_list (V4DF_type_node,
+ V4SF_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4df
+ = build_function_type_list (V4SF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8sf
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4df
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_int
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v8si_int
+ = build_function_type_list (V4SI_type_node,
+ V8SI_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_int
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, integer_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v8sf_int
+ = build_function_type_list (V4SF_type_node,
+ V8SF_type_node, integer_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v4df_int
+ = build_function_type_list (V2DF_type_node,
+ V4DF_type_node, integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8sf_int
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8sf_v8sf
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SF_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4df_v4df
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v8si_ftype_v8si_v8si_int
+ = build_function_type_list (V8SI_type_node,
+ V8SI_type_node, V8SI_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4df_int
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_pcfloat
+ = build_function_type_list (V8SF_type_node,
+ pcfloat_type_node,
+ NULL_TREE);
+ tree v4df_ftype_pcdouble
+ = build_function_type_list (V4DF_type_node,
+ pcdouble_type_node,
+ NULL_TREE);
+ tree pcv4sf_type_node
+ = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
+ tree pcv2df_type_node
+ = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
+ tree v8sf_ftype_pcv4sf
+ = build_function_type_list (V8SF_type_node,
+ pcv4sf_type_node,
+ NULL_TREE);
+ tree v4df_ftype_pcv2df
+ = build_function_type_list (V4DF_type_node,
+ pcv2df_type_node,
+ NULL_TREE);
+ tree v32qi_ftype_pcchar
+ = build_function_type_list (V32QI_type_node,
+ pcchar_type_node,
+ NULL_TREE);
+ tree void_ftype_pchar_v32qi
+ = build_function_type_list (void_type_node,
+ pchar_type_node, V32QI_type_node,
+ NULL_TREE);
+ tree v8si_ftype_v8si_v4si_int
+ = build_function_type_list (V8SI_type_node,
+ V8SI_type_node, V4SI_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree pv4di_type_node = build_pointer_type (V4DI_type_node);
+ tree void_ftype_pv4di_v4di
+ = build_function_type_list (void_type_node,
+ pv4di_type_node, V4DI_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v4sf_int
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V4SF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v2df_int
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V2DF_type_node,
+ integer_type_node,
+ NULL_TREE);
+ tree void_ftype_pfloat_v8sf
+ = build_function_type_list (void_type_node,
+ pfloat_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree void_ftype_pdouble_v4df
+ = build_function_type_list (void_type_node,
+ pdouble_type_node, V4DF_type_node,
+ NULL_TREE);
+ tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
+ tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
+ tree pv4df_type_node = build_pointer_type (V4DF_type_node);
+ tree pv2df_type_node = build_pointer_type (V2DF_type_node);
+ tree pcv8sf_type_node
+ = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
+ tree pcv4df_type_node
+ = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
+ tree v8sf_ftype_pcv8sf_v8sf
+ = build_function_type_list (V8SF_type_node,
+ pcv8sf_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_pcv4df_v4df
+ = build_function_type_list (V4DF_type_node,
+ pcv4df_type_node, V4DF_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_pcv4sf_v4sf
+ = build_function_type_list (V4SF_type_node,
+ pcv4sf_type_node, V4SF_type_node,
+ NULL_TREE);
+ tree v2df_ftype_pcv2df_v2df
+ = build_function_type_list (V2DF_type_node,
+ pcv2df_type_node, V2DF_type_node,
+ NULL_TREE);
+ tree void_ftype_pv8sf_v8sf_v8sf
+ = build_function_type_list (void_type_node,
+ pv8sf_type_node, V8SF_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree void_ftype_pv4df_v4df_v4df
+ = build_function_type_list (void_type_node,
+ pv4df_type_node, V4DF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree void_ftype_pv4sf_v4sf_v4sf
+ = build_function_type_list (void_type_node,
+ pv4sf_type_node, V4SF_type_node,
+ V4SF_type_node,
+ NULL_TREE);
+ tree void_ftype_pv2df_v2df_v2df
+ = build_function_type_list (void_type_node,
+ pv2df_type_node, V2DF_type_node,
+ V2DF_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v2df
+ = build_function_type_list (V4DF_type_node,
+ V2DF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v4sf
+ = build_function_type_list (V8SF_type_node,
+ V4SF_type_node,
+ NULL_TREE);
+ tree v8si_ftype_v4si
+ = build_function_type_list (V8SI_type_node,
+ V4SI_type_node,
+ NULL_TREE);
+ tree v2df_ftype_v4df
+ = build_function_type_list (V2DF_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v8sf
+ = build_function_type_list (V4SF_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree v4si_ftype_v8si
+ = build_function_type_list (V4SI_type_node,
+ V8SI_type_node,
+ NULL_TREE);
+ tree int_ftype_v4df
+ = build_function_type_list (integer_type_node,
+ V4DF_type_node,
+ NULL_TREE);
+ tree int_ftype_v8sf
+ = build_function_type_list (integer_type_node,
+ V8SF_type_node,
+ NULL_TREE);
+ tree int_ftype_v8sf_v8sf
+ = build_function_type_list (integer_type_node,
+ V8SF_type_node, V8SF_type_node,
+ NULL_TREE);
+ tree int_ftype_v4di_v4di
+ = build_function_type_list (integer_type_node,
+ V4DI_type_node, V4DI_type_node,
+ NULL_TREE);
+ tree int_ftype_v4df_v4df
+ = build_function_type_list (integer_type_node,
+ V4DF_type_node, V4DF_type_node,
+ NULL_TREE);
+ tree v8sf_ftype_v8sf_v8si
+ = build_function_type_list (V8SF_type_node,
+ V8SF_type_node, V8SI_type_node,
+ NULL_TREE);
+ tree v4df_ftype_v4df_v4di
+ = build_function_type_list (V4DF_type_node,
+ V4DF_type_node, V4DI_type_node,
+ NULL_TREE);
+ tree v4sf_ftype_v4sf_v4si
+ = build_function_type_list (V4SF_type_node,
+ V4SF_type_node, V4SI_type_node, NULL_TREE);
+ tree v2df_ftype_v2df_v2di
+ = build_function_type_list (V2DF_type_node,
+ V2DF_type_node, V2DI_type_node, NULL_TREE);
+
+ tree ftype;
+
+ /* Add all special builtins with variable number of operands. */
+ for (i = 0, d = bdesc_special_args;
+ i < ARRAY_SIZE (bdesc_special_args);
+ i++, d++)
+ {
+ tree type;
+
+ if (d->name == 0)
+ continue;
+
+ switch ((enum ix86_special_builtin_type) d->flag)
+ {
+ case VOID_FTYPE_VOID:
+ type = void_ftype_void;
+ break;
+ case V32QI_FTYPE_PCCHAR:
+ type = v32qi_ftype_pcchar;
+ break;
+ case V16QI_FTYPE_PCCHAR:
+ type = v16qi_ftype_pcchar;
+ break;
+ case V8SF_FTYPE_PCV4SF:
+ type = v8sf_ftype_pcv4sf;
+ break;
+ case V8SF_FTYPE_PCFLOAT:
+ type = v8sf_ftype_pcfloat;
+ break;
+ case V4DF_FTYPE_PCV2DF:
+ type = v4df_ftype_pcv2df;
+ break;
+ case V4DF_FTYPE_PCDOUBLE:
+ type = v4df_ftype_pcdouble;
+ break;
+ case V4SF_FTYPE_PCFLOAT:
+ type = v4sf_ftype_pcfloat;
+ break;
+ case V2DI_FTYPE_PV2DI:
+ type = v2di_ftype_pv2di;
+ break;
+ case V2DF_FTYPE_PCDOUBLE:
+ type = v2df_ftype_pcdouble;
+ break;
+ case V8SF_FTYPE_PCV8SF_V8SF:
+ type = v8sf_ftype_pcv8sf_v8sf;
+ break;
+ case V4DF_FTYPE_PCV4DF_V4DF:
+ type = v4df_ftype_pcv4df_v4df;
+ break;
+ case V4SF_FTYPE_V4SF_PCV2SF:
+ type = v4sf_ftype_v4sf_pcv2sf;
+ break;
+ case V4SF_FTYPE_PCV4SF_V4SF:
+ type = v4sf_ftype_pcv4sf_v4sf;
+ break;
+ case V2DF_FTYPE_V2DF_PCDOUBLE:
+ type = v2df_ftype_v2df_pcdouble;
+ break;
+ case V2DF_FTYPE_PCV2DF_V2DF:
+ type = v2df_ftype_pcv2df_v2df;
+ break;
+ case VOID_FTYPE_PV2SF_V4SF:
+ type = void_ftype_pv2sf_v4sf;
+ break;
+ case VOID_FTYPE_PV4DI_V4DI:
+ type = void_ftype_pv4di_v4di;
+ break;
+ case VOID_FTYPE_PV2DI_V2DI:
+ type = void_ftype_pv2di_v2di;
+ break;
+ case VOID_FTYPE_PCHAR_V32QI:
+ type = void_ftype_pchar_v32qi;
+ break;
+ case VOID_FTYPE_PCHAR_V16QI:
+ type = void_ftype_pchar_v16qi;
+ break;
+ case VOID_FTYPE_PFLOAT_V8SF:
+ type = void_ftype_pfloat_v8sf;
+ break;
+ case VOID_FTYPE_PFLOAT_V4SF:
+ type = void_ftype_pfloat_v4sf;
+ break;
+ case VOID_FTYPE_PDOUBLE_V4DF:
+ type = void_ftype_pdouble_v4df;
+ break;
+ case VOID_FTYPE_PDOUBLE_V2DF:
+ type = void_ftype_pdouble_v2df;
+ break;
+ case VOID_FTYPE_PDI_DI:
+ type = void_ftype_pdi_di;
+ break;
+ case VOID_FTYPE_PINT_INT:
+ type = void_ftype_pint_int;
+ break;
+ case VOID_FTYPE_PV8SF_V8SF_V8SF:
+ type = void_ftype_pv8sf_v8sf_v8sf;
+ break;
+ case VOID_FTYPE_PV4DF_V4DF_V4DF:
+ type = void_ftype_pv4df_v4df_v4df;
+ break;
+ case VOID_FTYPE_PV4SF_V4SF_V4SF:
+ type = void_ftype_pv4sf_v4sf_v4sf;
+ break;
+ case VOID_FTYPE_PV2DF_V2DF_V2DF:
+ type = void_ftype_pv2df_v2df_v2df;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ def_builtin (d->mask, d->name, type, d->code);
+ }
+
+ /* Add all builtins with variable number of operands. */
+ for (i = 0, d = bdesc_args;
+ i < ARRAY_SIZE (bdesc_args);
+ i++, d++)
+ {
+ tree type;
+
+ if (d->name == 0)
+ continue;
+
+ switch ((enum ix86_builtin_type) d->flag)
+ {
+ case FLOAT_FTYPE_FLOAT:
+ type = float_ftype_float;
+ break;
+ case INT_FTYPE_V8SF_V8SF_PTEST:
+ type = int_ftype_v8sf_v8sf;
+ break;
+ case INT_FTYPE_V4DI_V4DI_PTEST:
+ type = int_ftype_v4di_v4di;
+ break;
+ case INT_FTYPE_V4DF_V4DF_PTEST:
+ type = int_ftype_v4df_v4df;
+ break;
+ case INT_FTYPE_V4SF_V4SF_PTEST:
+ type = int_ftype_v4sf_v4sf;
+ break;
+ case INT_FTYPE_V2DI_V2DI_PTEST:
+ type = int_ftype_v2di_v2di;
+ break;
+ case INT_FTYPE_V2DF_V2DF_PTEST:
+ type = int_ftype_v2df_v2df;
+ break;
+ case INT64_FTYPE_V4SF:
+ type = int64_ftype_v4sf;
+ break;
+ case INT64_FTYPE_V2DF:
+ type = int64_ftype_v2df;
+ break;
+ case INT_FTYPE_V16QI:
+ type = int_ftype_v16qi;
+ break;
+ case INT_FTYPE_V8QI:
+ type = int_ftype_v8qi;
+ break;
+ case INT_FTYPE_V8SF:
+ type = int_ftype_v8sf;
+ break;
+ case INT_FTYPE_V4DF:
+ type = int_ftype_v4df;
+ break;
+ case INT_FTYPE_V4SF:
+ type = int_ftype_v4sf;
+ break;
+ case INT_FTYPE_V2DF:
+ type = int_ftype_v2df;
+ break;
+ case V16QI_FTYPE_V16QI:
+ type = v16qi_ftype_v16qi;
+ break;
+ case V8SI_FTYPE_V8SF:
+ type = v8si_ftype_v8sf;
+ break;
+ case V8SI_FTYPE_V4SI:
+ type = v8si_ftype_v4si;
+ break;
+ case V8HI_FTYPE_V8HI:
+ type = v8hi_ftype_v8hi;
+ break;
+ case V8HI_FTYPE_V16QI:
+ type = v8hi_ftype_v16qi;
+ break;
+ case V8QI_FTYPE_V8QI:
+ type = v8qi_ftype_v8qi;
+ break;
+ case V8SF_FTYPE_V8SF:
+ type = v8sf_ftype_v8sf;
+ break;
+ case V8SF_FTYPE_V8SI:
+ type = v8sf_ftype_v8si;
+ break;
+ case V8SF_FTYPE_V4SF:
+ type = v8sf_ftype_v4sf;
+ break;
+ case V4SI_FTYPE_V4DF:
+ type = v4si_ftype_v4df;
+ break;
+ case V4SI_FTYPE_V4SI:
+ type = v4si_ftype_v4si;
+ break;
+ case V4SI_FTYPE_V16QI:
+ type = v4si_ftype_v16qi;
+ break;
+ case V4SI_FTYPE_V8SI:
+ type = v4si_ftype_v8si;
+ break;
+ case V4SI_FTYPE_V8HI:
+ type = v4si_ftype_v8hi;
+ break;
+ case V4SI_FTYPE_V4SF:
+ type = v4si_ftype_v4sf;
+ break;
+ case V4SI_FTYPE_V2DF:
+ type = v4si_ftype_v2df;
+ break;
+ case V4HI_FTYPE_V4HI:
+ type = v4hi_ftype_v4hi;
+ break;
+ case V4DF_FTYPE_V4DF:
+ type = v4df_ftype_v4df;
+ break;
+ case V4DF_FTYPE_V4SI:
+ type = v4df_ftype_v4si;
+ break;
+ case V4DF_FTYPE_V4SF:
+ type = v4df_ftype_v4sf;
+ break;
+ case V4DF_FTYPE_V2DF:
+ type = v4df_ftype_v2df;
+ break;
+ case V4SF_FTYPE_V4SF:
+ case V4SF_FTYPE_V4SF_VEC_MERGE:
+ type = v4sf_ftype_v4sf;
+ break;
+ case V4SF_FTYPE_V8SF:
+ type = v4sf_ftype_v8sf;
+ break;
+ case V4SF_FTYPE_V4SI:
+ type = v4sf_ftype_v4si;
+ break;
+ case V4SF_FTYPE_V4DF:
+ type = v4sf_ftype_v4df;
+ break;
+ case V4SF_FTYPE_V2DF:
+ type = v4sf_ftype_v2df;
+ break;
+ case V2DI_FTYPE_V2DI:
+ type = v2di_ftype_v2di;
+ break;
+ case V2DI_FTYPE_V16QI:
+ type = v2di_ftype_v16qi;
+ break;
+ case V2DI_FTYPE_V8HI:
+ type = v2di_ftype_v8hi;
+ break;
+ case V2DI_FTYPE_V4SI:
+ type = v2di_ftype_v4si;
+ break;
+ case V2SI_FTYPE_V2SI:
+ type = v2si_ftype_v2si;
+ break;
+ case V2SI_FTYPE_V4SF:
+ type = v2si_ftype_v4sf;
+ break;
+ case V2SI_FTYPE_V2DF:
+ type = v2si_ftype_v2df;
+ break;
+ case V2SI_FTYPE_V2SF:
+ type = v2si_ftype_v2sf;
+ break;
+ case V2DF_FTYPE_V4DF:
+ type = v2df_ftype_v4df;
+ break;
+ case V2DF_FTYPE_V4SF:
+ type = v2df_ftype_v4sf;
+ break;
+ case V2DF_FTYPE_V2DF:
+ case V2DF_FTYPE_V2DF_VEC_MERGE:
+ type = v2df_ftype_v2df;
+ break;
+ case V2DF_FTYPE_V2SI:
+ type = v2df_ftype_v2si;
+ break;
+ case V2DF_FTYPE_V4SI:
+ type = v2df_ftype_v4si;
+ break;
+ case V2SF_FTYPE_V2SF:
+ type = v2sf_ftype_v2sf;
+ break;
+ case V2SF_FTYPE_V2SI:
+ type = v2sf_ftype_v2si;
+ break;
+ case V16QI_FTYPE_V16QI_V16QI:
+ type = v16qi_ftype_v16qi_v16qi;
+ break;
+ case V16QI_FTYPE_V8HI_V8HI:
+ type = v16qi_ftype_v8hi_v8hi;
+ break;
+ case V8QI_FTYPE_V8QI_V8QI:
+ type = v8qi_ftype_v8qi_v8qi;
+ break;
+ case V8QI_FTYPE_V4HI_V4HI:
+ type = v8qi_ftype_v4hi_v4hi;
+ break;
+ case V8HI_FTYPE_V8HI_V8HI:
+ case V8HI_FTYPE_V8HI_V8HI_COUNT:
+ type = v8hi_ftype_v8hi_v8hi;
+ break;
+ case V8HI_FTYPE_V16QI_V16QI:
+ type = v8hi_ftype_v16qi_v16qi;
+ break;
+ case V8HI_FTYPE_V4SI_V4SI:
+ type = v8hi_ftype_v4si_v4si;
+ break;
+ case V8HI_FTYPE_V8HI_SI_COUNT:
+ type = v8hi_ftype_v8hi_int;
+ break;
+ case V8SF_FTYPE_V8SF_V8SF:
+ type = v8sf_ftype_v8sf_v8sf;
+ break;
+ case V8SF_FTYPE_V8SF_V8SI:
+ type = v8sf_ftype_v8sf_v8si;
+ break;
+ case V4SI_FTYPE_V4SI_V4SI:
+ case V4SI_FTYPE_V4SI_V4SI_COUNT:
+ type = v4si_ftype_v4si_v4si;
+ break;
+ case V4SI_FTYPE_V8HI_V8HI:
+ type = v4si_ftype_v8hi_v8hi;
+ break;
+ case V4SI_FTYPE_V4SF_V4SF:
+ type = v4si_ftype_v4sf_v4sf;
+ break;
+ case V4SI_FTYPE_V2DF_V2DF:
+ type = v4si_ftype_v2df_v2df;
+ break;
+ case V4SI_FTYPE_V4SI_SI_COUNT:
+ type = v4si_ftype_v4si_int;
+ break;
+ case V4HI_FTYPE_V4HI_V4HI:
+ case V4HI_FTYPE_V4HI_V4HI_COUNT:
+ type = v4hi_ftype_v4hi_v4hi;
+ break;
+ case V4HI_FTYPE_V8QI_V8QI:
+ type = v4hi_ftype_v8qi_v8qi;
+ break;
+ case V4HI_FTYPE_V2SI_V2SI:
+ type = v4hi_ftype_v2si_v2si;
+ break;
+ case V4HI_FTYPE_V4HI_SI_COUNT:
+ type = v4hi_ftype_v4hi_int;
+ break;
+ case V4DF_FTYPE_V4DF_V4DF:
+ type = v4df_ftype_v4df_v4df;
+ break;
+ case V4DF_FTYPE_V4DF_V4DI:
+ type = v4df_ftype_v4df_v4di;
+ break;
+ case V4SF_FTYPE_V4SF_V4SF:
+ case V4SF_FTYPE_V4SF_V4SF_SWAP:
+ type = v4sf_ftype_v4sf_v4sf;
+ break;
+ case V4SF_FTYPE_V4SF_V4SI:
+ type = v4sf_ftype_v4sf_v4si;
+ break;
+ case V4SF_FTYPE_V4SF_V2SI:
+ type = v4sf_ftype_v4sf_v2si;
+ break;
+ case V4SF_FTYPE_V4SF_V2DF:
+ type = v4sf_ftype_v4sf_v2df;
+ break;
+ case V4SF_FTYPE_V4SF_DI:
+ type = v4sf_ftype_v4sf_int64;
+ break;
+ case V4SF_FTYPE_V4SF_SI:
+ type = v4sf_ftype_v4sf_int;
+ break;
+ case V2DI_FTYPE_V2DI_V2DI:
+ case V2DI_FTYPE_V2DI_V2DI_COUNT:
+ type = v2di_ftype_v2di_v2di;
+ break;
+ case V2DI_FTYPE_V16QI_V16QI:
+ type = v2di_ftype_v16qi_v16qi;
+ break;
+ case V2DI_FTYPE_V4SI_V4SI:
+ type = v2di_ftype_v4si_v4si;
+ break;
+ case V2DI_FTYPE_V2DI_V16QI:
+ type = v2di_ftype_v2di_v16qi;
+ break;
+ case V2DI_FTYPE_V2DF_V2DF:
+ type = v2di_ftype_v2df_v2df;
+ break;
+ case V2DI_FTYPE_V2DI_SI_COUNT:
+ type = v2di_ftype_v2di_int;
+ break;
+ case V2SI_FTYPE_V2SI_V2SI:
+ case V2SI_FTYPE_V2SI_V2SI_COUNT:
+ type = v2si_ftype_v2si_v2si;
+ break;
+ case V2SI_FTYPE_V4HI_V4HI:
+ type = v2si_ftype_v4hi_v4hi;
+ break;
+ case V2SI_FTYPE_V2SF_V2SF:
+ type = v2si_ftype_v2sf_v2sf;
+ break;
+ case V2SI_FTYPE_V2SI_SI_COUNT:
+ type = v2si_ftype_v2si_int;
+ break;
+ case V2DF_FTYPE_V2DF_V2DF:
+ case V2DF_FTYPE_V2DF_V2DF_SWAP:
+ type = v2df_ftype_v2df_v2df;
+ break;
+ case V2DF_FTYPE_V2DF_V4SF:
+ type = v2df_ftype_v2df_v4sf;
+ break;
+ case V2DF_FTYPE_V2DF_V2DI:
+ type = v2df_ftype_v2df_v2di;
+ break;
+ case V2DF_FTYPE_V2DF_DI:
+ type = v2df_ftype_v2df_int64;
+ break;
+ case V2DF_FTYPE_V2DF_SI:
+ type = v2df_ftype_v2df_int;
+ break;
+ case V2SF_FTYPE_V2SF_V2SF:
+ type = v2sf_ftype_v2sf_v2sf;
+ break;
+ case V1DI_FTYPE_V1DI_V1DI:
+ case V1DI_FTYPE_V1DI_V1DI_COUNT:
+ type = v1di_ftype_v1di_v1di;
+ break;
+ case V1DI_FTYPE_V8QI_V8QI:
+ type = v1di_ftype_v8qi_v8qi;
+ break;
+ case V1DI_FTYPE_V2SI_V2SI:
+ type = v1di_ftype_v2si_v2si;
+ break;
+ case V1DI_FTYPE_V1DI_SI_COUNT:
+ type = v1di_ftype_v1di_int;
+ break;
+ case UINT64_FTYPE_UINT64_UINT64:
+ type = uint64_ftype_uint64_uint64;
+ break;
+ case UINT_FTYPE_UINT_UINT:
+ type = unsigned_ftype_unsigned_unsigned;
+ break;
+ case UINT_FTYPE_UINT_USHORT:
+ type = unsigned_ftype_unsigned_ushort;
+ break;
+ case UINT_FTYPE_UINT_UCHAR:
+ type = unsigned_ftype_unsigned_uchar;
+ break;
+ case V8HI_FTYPE_V8HI_INT:
+ type = v8hi_ftype_v8hi_int;
+ break;
+ case V8SF_FTYPE_V8SF_INT:
+ type = v8sf_ftype_v8sf_int;
+ break;
+ case V4SI_FTYPE_V4SI_INT:
+ type = v4si_ftype_v4si_int;
+ break;
+ case V4SI_FTYPE_V8SI_INT:
+ type = v4si_ftype_v8si_int;
+ break;
+ case V4HI_FTYPE_V4HI_INT:
+ type = v4hi_ftype_v4hi_int;
+ break;
+ case V4DF_FTYPE_V4DF_INT:
+ type = v4df_ftype_v4df_int;
+ break;
+ case V4SF_FTYPE_V4SF_INT:
+ type = v4sf_ftype_v4sf_int;
+ break;
+ case V4SF_FTYPE_V8SF_INT:
+ type = v4sf_ftype_v8sf_int;
+ break;
+ case V2DI_FTYPE_V2DI_INT:
+ case V2DI2TI_FTYPE_V2DI_INT:
+ type = v2di_ftype_v2di_int;
+ break;
+ case V2DF_FTYPE_V2DF_INT:
+ type = v2df_ftype_v2df_int;
+ break;
+ case V2DF_FTYPE_V4DF_INT:
+ type = v2df_ftype_v4df_int;
+ break;
+ case V16QI_FTYPE_V16QI_V16QI_V16QI:
+ type = v16qi_ftype_v16qi_v16qi_v16qi;
+ break;
+ case V8SF_FTYPE_V8SF_V8SF_V8SF:
+ type = v8sf_ftype_v8sf_v8sf_v8sf;
+ break;
+ case V4DF_FTYPE_V4DF_V4DF_V4DF:
+ type = v4df_ftype_v4df_v4df_v4df;
+ break;
+ case V4SF_FTYPE_V4SF_V4SF_V4SF:
+ type = v4sf_ftype_v4sf_v4sf_v4sf;
+ break;
+ case V2DF_FTYPE_V2DF_V2DF_V2DF:
+ type = v2df_ftype_v2df_v2df_v2df;
+ break;
+ case V16QI_FTYPE_V16QI_V16QI_INT:
+ type = v16qi_ftype_v16qi_v16qi_int;
+ break;
+ case V8SI_FTYPE_V8SI_V8SI_INT:
+ type = v8si_ftype_v8si_v8si_int;
+ break;
+ case V8SI_FTYPE_V8SI_V4SI_INT:
+ type = v8si_ftype_v8si_v4si_int;
+ break;
+ case V8HI_FTYPE_V8HI_V8HI_INT:
+ type = v8hi_ftype_v8hi_v8hi_int;
+ break;
+ case V8SF_FTYPE_V8SF_V8SF_INT:
+ type = v8sf_ftype_v8sf_v8sf_int;
+ break;
+ case V8SF_FTYPE_V8SF_V4SF_INT:
+ type = v8sf_ftype_v8sf_v4sf_int;
+ break;
+ case V4SI_FTYPE_V4SI_V4SI_INT:
+ type = v4si_ftype_v4si_v4si_int;
+ break;
+ case V4DF_FTYPE_V4DF_V4DF_INT:
+ type = v4df_ftype_v4df_v4df_int;
+ break;
+ case V4DF_FTYPE_V4DF_V2DF_INT:
+ type = v4df_ftype_v4df_v2df_int;
+ break;
+ case V4SF_FTYPE_V4SF_V4SF_INT:
+ type = v4sf_ftype_v4sf_v4sf_int;
+ break;
+ case V2DI_FTYPE_V2DI_V2DI_INT:
+ case V2DI2TI_FTYPE_V2DI_V2DI_INT:
+ type = v2di_ftype_v2di_v2di_int;
+ break;
+ case V2DF_FTYPE_V2DF_V2DF_INT:
+ type = v2df_ftype_v2df_v2df_int;
+ break;
+ case V2DI_FTYPE_V2DI_UINT_UINT:
+ type = v2di_ftype_v2di_unsigned_unsigned;
+ break;
+ case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
+ type = v2di_ftype_v2di_v2di_unsigned_unsigned;
+ break;
+ case V1DI2DI_FTYPE_V1DI_V1DI_INT:
+ type = v1di_ftype_v1di_v1di_int;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ def_builtin_const (d->mask, d->name, type, d->code);
+ }
+
+ /* pcmpestr[im] insns. */
+ for (i = 0, d = bdesc_pcmpestr;
+ i < ARRAY_SIZE (bdesc_pcmpestr);
+ i++, d++)
+ {
+ if (d->code == IX86_BUILTIN_PCMPESTRM128)
+ ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
+ else
+ ftype = int_ftype_v16qi_int_v16qi_int_int;
+ def_builtin_const (d->mask, d->name, ftype, d->code);
+ }
+
+ /* pcmpistr[im] insns. */
+ for (i = 0, d = bdesc_pcmpistr;
+ i < ARRAY_SIZE (bdesc_pcmpistr);
+ i++, d++)
+ {
+ if (d->code == IX86_BUILTIN_PCMPISTRM128)
+ ftype = v16qi_ftype_v16qi_v16qi_int;
+ else
+ ftype = int_ftype_v16qi_v16qi_int;
+ def_builtin_const (d->mask, d->name, ftype, d->code);
+ }
+
+ /* comi/ucomi insns. */
+ for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
+ if (d->mask == OPTION_MASK_ISA_SSE2)
+ def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
+ else
+ def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
+
+ /* SSE */
+ def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
+ def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
+
+ /* SSE or 3DNow!A */
+ def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
+
+ /* SSE2 */
+ def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
+
+ def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
+ x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
+
+ /* SSE3. */
+ def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
+ def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
+
+ /* AES */
+ def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
+ def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
+ def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
+ def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
+ def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
+ def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
+
+ /* PCLMUL */
+ def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
+
+ /* AVX */
+ def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
+ TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
+
+ /* Access to the vec_init patterns. */
+ ftype = build_function_type_list (V2SI_type_node, integer_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
+
+ ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
+ short_integer_type_node,
+ short_integer_type_node,
+ short_integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
+
+ ftype = build_function_type_list (V8QI_type_node, char_type_node,
+ char_type_node, char_type_node,
+ char_type_node, char_type_node,
+ char_type_node, char_type_node,
+ char_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
+
+ /* Access to the vec_extract patterns. */
+ ftype = build_function_type_list (double_type_node, V2DF_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
+
+ ftype = build_function_type_list (long_long_integer_type_node,
+ V2DI_type_node, integer_type_node,
+ NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
+
+ ftype = build_function_type_list (float_type_node, V4SF_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
+
+ ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
+
+ ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
+
+ ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
+
+ ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
+
+ ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
+
+ /* Access to the vec_set patterns. */
+ ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
+ intDI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
+
+ ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
+ float_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
+
+ ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
+ intSI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
+
+ ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
+ intHI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
+
+ ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
+ intHI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
+
+ ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
+ intQI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
+
+ /* Add SSE5 multi-arg argument instructions */
+ for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
+ {
+ tree mtype = NULL_TREE;
+
+ if (d->name == 0)
+ continue;
+
+ switch ((enum multi_arg_type)d->flag)
+ {
+ case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
+ case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
+ case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
+ case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
+ case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
+ case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
+ case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
+ case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
+ case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
+ case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
+ case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
+ case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
+ case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
+ case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
+ case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
+ case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
+ case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
+ case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
+ case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
+ case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
+ case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
+ case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
+ case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
+ case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
+ case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
+ case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
+ case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
+ case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
+ case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
+ case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
+ case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
+ case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
+ case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
+ case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
+ case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
+ case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
+ case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
+ case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
+ case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
+ case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
+ case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
+ case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
+ case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
+ case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
+ case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
+ case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
+ case MULTI_ARG_UNKNOWN:
+ default:
+ gcc_unreachable ();
+ }
+
+ if (mtype)
+ def_builtin_const (d->mask, d->name, mtype, d->code);
+ }
+}
+
+/* Internal method for ix86_init_builtins. */
+
+static void
+ix86_init_builtins_va_builtins_abi (void)
+{
+ tree ms_va_ref, sysv_va_ref;
+ tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
+ tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
+ tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
+ tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
+
+ if (!TARGET_64BIT)
+ return;
+ fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
+ fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
+ ms_va_ref = build_reference_type (ms_va_list_type_node);
+ sysv_va_ref =
+ build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
+
+ fnvoid_va_end_ms =
+ build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
+ fnvoid_va_start_ms =
+ build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
+ fnvoid_va_end_sysv =
+ build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
+ fnvoid_va_start_sysv =
+ build_varargs_function_type_list (void_type_node, sysv_va_ref,
+ NULL_TREE);
+ fnvoid_va_copy_ms =
+ build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
+ NULL_TREE);
+ fnvoid_va_copy_sysv =
+ build_function_type_list (void_type_node, sysv_va_ref,
+ sysv_va_ref, NULL_TREE);
+
+ add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
+ BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
+ add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
+ BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
+ add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
+ BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
+ add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
+ BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
+ add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
+ BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
+ add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
+ BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
+}
+
+static void
+ix86_init_builtins (void)
+{
+ tree float128_type_node = make_node (REAL_TYPE);
+ tree ftype, decl;
+
+ /* The __float80 type. */
+ if (TYPE_MODE (long_double_type_node) == XFmode)
+ (*lang_hooks.types.register_builtin_type) (long_double_type_node,
+ "__float80");
+ else
+ {
+ /* The __float80 type. */
+ tree float80_type_node = make_node (REAL_TYPE);
+
+ TYPE_PRECISION (float80_type_node) = 80;
+ layout_type (float80_type_node);
+ (*lang_hooks.types.register_builtin_type) (float80_type_node,
+ "__float80");
+ }
+
+ /* The __float128 type. */
+ TYPE_PRECISION (float128_type_node) = 128;
+ layout_type (float128_type_node);
+ (*lang_hooks.types.register_builtin_type) (float128_type_node,
+ "__float128");
+
+ /* TFmode support builtins. */
+ ftype = build_function_type (float128_type_node, void_list_node);
+ decl = add_builtin_function ("__builtin_infq", ftype,
+ IX86_BUILTIN_INFQ, BUILT_IN_MD,
+ NULL, NULL_TREE);
+ ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
+
+ /* We will expand them to normal call if SSE2 isn't available since
+ they are used by libgcc. */
+ ftype = build_function_type_list (float128_type_node,
+ float128_type_node,
+ NULL_TREE);
+ decl = add_builtin_function ("__builtin_fabsq", ftype,
+ IX86_BUILTIN_FABSQ, BUILT_IN_MD,
+ "__fabstf2", NULL_TREE);
+ ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
+ TREE_READONLY (decl) = 1;
+
+ ftype = build_function_type_list (float128_type_node,
+ float128_type_node,
+ float128_type_node,
+ NULL_TREE);
+ decl = add_builtin_function ("__builtin_copysignq", ftype,
+ IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
+ "__copysigntf3", NULL_TREE);
+ ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
+ TREE_READONLY (decl) = 1;
+
+ ix86_init_mmx_sse_builtins ();
+ if (TARGET_64BIT)
+ ix86_init_builtins_va_builtins_abi ();
+}
+
+/* Errors in the source file can cause expand_expr to return const0_rtx
+ where we expect a vector. To avoid crashing, use one of the vector
+ clear instructions. */
+static rtx
+safe_vector_operand (rtx x, enum machine_mode mode)
+{
+ if (x == const0_rtx)
+ x = CONST0_RTX (mode);
+ return x;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of binop insns. */
+
+static rtx
+ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+ enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ if (optimize || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if (GET_MODE (op1) == SImode && mode1 == TImode)
+ {
+ rtx x = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_loadd (x, op1));
+ op1 = gen_lowpart (TImode, x);
+ }
+
+ if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
+
+static rtx
+ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
+ enum multi_arg_type m_type,
+ enum insn_code sub_code)
+{
+ rtx pat;
+ int i;
+ int nargs;
+ bool comparison_p = false;
+ bool tf_p = false;
+ bool last_arg_constant = false;
+ int num_memory = 0;
+ struct {
+ rtx op;
+ enum machine_mode mode;
+ } args[4];
+
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+
+ switch (m_type)
+ {
+ case MULTI_ARG_3_SF:
+ case MULTI_ARG_3_DF:
+ case MULTI_ARG_3_DI:
+ case MULTI_ARG_3_SI:
+ case MULTI_ARG_3_SI_DI:
+ case MULTI_ARG_3_HI:
+ case MULTI_ARG_3_HI_SI:
+ case MULTI_ARG_3_QI:
+ case MULTI_ARG_3_PERMPS:
+ case MULTI_ARG_3_PERMPD:
+ nargs = 3;
+ break;
+
+ case MULTI_ARG_2_SF:
+ case MULTI_ARG_2_DF:
+ case MULTI_ARG_2_DI:
+ case MULTI_ARG_2_SI:
+ case MULTI_ARG_2_HI:
+ case MULTI_ARG_2_QI:
+ nargs = 2;
+ break;
+
+ case MULTI_ARG_2_DI_IMM:
+ case MULTI_ARG_2_SI_IMM:
+ case MULTI_ARG_2_HI_IMM:
+ case MULTI_ARG_2_QI_IMM:
+ nargs = 2;
+ last_arg_constant = true;
+ break;
+
+ case MULTI_ARG_1_SF:
+ case MULTI_ARG_1_DF:
+ case MULTI_ARG_1_DI:
+ case MULTI_ARG_1_SI:
+ case MULTI_ARG_1_HI:
+ case MULTI_ARG_1_QI:
+ case MULTI_ARG_1_SI_DI:
+ case MULTI_ARG_1_HI_DI:
+ case MULTI_ARG_1_HI_SI:
+ case MULTI_ARG_1_QI_DI:
+ case MULTI_ARG_1_QI_SI:
+ case MULTI_ARG_1_QI_HI:
+ case MULTI_ARG_1_PH2PS:
+ case MULTI_ARG_1_PS2PH:
+ nargs = 1;
+ break;
+
+ case MULTI_ARG_2_SF_CMP:
+ case MULTI_ARG_2_DF_CMP:
+ case MULTI_ARG_2_DI_CMP:
+ case MULTI_ARG_2_SI_CMP:
+ case MULTI_ARG_2_HI_CMP:
+ case MULTI_ARG_2_QI_CMP:
+ nargs = 2;
+ comparison_p = true;
+ break;
+
+ case MULTI_ARG_2_SF_TF:
+ case MULTI_ARG_2_DF_TF:
+ case MULTI_ARG_2_DI_TF:
+ case MULTI_ARG_2_SI_TF:
+ case MULTI_ARG_2_HI_TF:
+ case MULTI_ARG_2_QI_TF:
+ nargs = 2;
+ tf_p = true;
+ break;
+
+ case MULTI_ARG_UNKNOWN:
+ default:
+ gcc_unreachable ();
+ }
+
+ if (optimize || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ gcc_assert (nargs <= 4);
+
+ for (i = 0; i < nargs; i++)
+ {
+ tree arg = CALL_EXPR_ARG (exp, i);
+ rtx op = expand_normal (arg);
+ int adjust = (comparison_p) ? 1 : 0;
+ enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
+
+ if (last_arg_constant && i == nargs-1)
+ {
+ if (GET_CODE (op) != CONST_INT)
+ {
+ error ("last argument must be an immediate");
+ return gen_reg_rtx (tmode);
+ }
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ op = safe_vector_operand (op, mode);
+
+ /* If we aren't optimizing, only allow one memory operand to be
+ generated. */
+ if (memory_operand (op, mode))
+ num_memory++;
+
+ gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
+
+ if (optimize
+ || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
+ || num_memory > 1)
+ op = force_reg (mode, op);
+ }
+
+ args[i].op = op;
+ args[i].mode = mode;
+ }
+
+ switch (nargs)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (target, args[0].op);
+ break;
+
+ case 2:
+ if (tf_p)
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
+ GEN_INT ((int)sub_code));
+ else if (! comparison_p)
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
+ else
+ {
+ rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
+ args[0].op,
+ args[1].op);
+
+ pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
+ }
+ break;
+
+ case 3:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_args_builtin to take care of scalar unop
+ insns with vec_merge. */
+
+static rtx
+ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ rtx op1, op0 = expand_normal (arg0);
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+ if (optimize || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+
+ op1 = op0;
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
+ op1 = copy_to_mode_reg (mode0, op1);
+
+ pat = GEN_FCN (icode) (target, op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
+
+static rtx
+ix86_expand_sse_compare (const struct builtin_description *d,
+ tree exp, rtx target, bool swap)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2;
+ enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
+ enum rtx_code comparison = d->comparison;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ /* Swap operands if we have a comparison that isn't available in
+ hardware. */
+ if (swap)
+ {
+ rtx tmp = gen_reg_rtx (mode1);
+ emit_move_insn (tmp, op1);
+ op1 = op0;
+ op0 = tmp;
+ }
+
+ if (optimize || !target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
+ pat = GEN_FCN (d->icode) (target, op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of comi insns. */
+
+static rtx
+ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+ enum rtx_code comparison = d->comparison;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ /* Swap operands if we have a comparison that isn't available in
+ hardware. */
+ if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
+ {
+ rtx tmp = op1;
+ op1 = op0;
+ op0 = tmp;
+ }
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (d->icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ emit_insn (gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (comparison, QImode,
+ SET_DEST (pat),
+ const0_rtx)));
+
+ return SUBREG_REG (target);
+}
+
+/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
+
+static rtx
+ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+ enum rtx_code comparison = d->comparison;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (d->icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ emit_insn (gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (comparison, QImode,
+ SET_DEST (pat),
+ const0_rtx)));
+
+ return SUBREG_REG (target);
+}
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpestr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
+ tree arg3 = CALL_EXPR_ARG (exp, 3);
+ tree arg4 = CALL_EXPR_ARG (exp, 4);
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ rtx op3 = expand_normal (arg3);
+ rtx op4 = expand_normal (arg4);
+ enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modei3 = insn_data[d->icode].operand[3].mode;
+ modev4 = insn_data[d->icode].operand[4].mode;
+ modei5 = insn_data[d->icode].operand[5].mode;
+ modeimm = insn_data[d->icode].operand[6].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev4))
+ op2 = safe_vector_operand (op2, modev4);
+
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
+ op1 = copy_to_mode_reg (modei3, op1);
+ if ((optimize && !register_operand (op2, modev4))
+ || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
+ op2 = copy_to_mode_reg (modev4, op2);
+ if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
+ op3 = copy_to_mode_reg (modei5, op3);
+
+ if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
+ {
+ error ("the fifth argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPESTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPESTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG ((enum machine_mode) d->flag,
+ FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
+}
+
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpistr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modev3 = insn_data[d->icode].operand[3].mode;
+ modeimm = insn_data[d->icode].operand[4].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev3))
+ op1 = safe_vector_operand (op1, modev3);
+
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if ((optimize && !register_operand (op1, modev3))
+ || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
+ op1 = copy_to_mode_reg (modev3, op1);
+
+ if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
+ {
+ error ("the third argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPISTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPISTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG ((enum machine_mode) d->flag,
+ FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of insns with
+ variable number of operands. */
+
+static rtx
+ix86_expand_args_builtin (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat, real_target;
+ unsigned int i, nargs;
+ unsigned int nargs_constant = 0;
+ int num_memory = 0;
+ struct
+ {
+ rtx op;
+ enum machine_mode mode;
+ } args[4];
+ bool last_arg_count = false;
+ enum insn_code icode = d->icode;
+ const struct insn_data *insn_p = &insn_data[icode];
+ enum machine_mode tmode = insn_p->operand[0].mode;
+ enum machine_mode rmode = VOIDmode;
+ bool swap = false;
+ enum rtx_code comparison = d->comparison;
+
+ switch ((enum ix86_builtin_type) d->flag)
+ {
+ case INT_FTYPE_V8SF_V8SF_PTEST:
+ case INT_FTYPE_V4DI_V4DI_PTEST:
+ case INT_FTYPE_V4DF_V4DF_PTEST:
+ case INT_FTYPE_V4SF_V4SF_PTEST:
+ case INT_FTYPE_V2DI_V2DI_PTEST:
+ case INT_FTYPE_V2DF_V2DF_PTEST:
+ return ix86_expand_sse_ptest (d, exp, target);
+ case FLOAT128_FTYPE_FLOAT128:
+ case FLOAT_FTYPE_FLOAT:
+ case INT64_FTYPE_V4SF:
+ case INT64_FTYPE_V2DF:
+ case INT_FTYPE_V16QI:
+ case INT_FTYPE_V8QI:
+ case INT_FTYPE_V8SF:
+ case INT_FTYPE_V4DF:
+ case INT_FTYPE_V4SF:
+ case INT_FTYPE_V2DF:
+ case V16QI_FTYPE_V16QI:
+ case V8SI_FTYPE_V8SF:
+ case V8SI_FTYPE_V4SI:
+ case V8HI_FTYPE_V8HI:
+ case V8HI_FTYPE_V16QI:
+ case V8QI_FTYPE_V8QI:
+ case V8SF_FTYPE_V8SF:
+ case V8SF_FTYPE_V8SI:
+ case V8SF_FTYPE_V4SF:
+ case V4SI_FTYPE_V4SI:
+ case V4SI_FTYPE_V16QI:
+ case V4SI_FTYPE_V4SF:
+ case V4SI_FTYPE_V8SI:
+ case V4SI_FTYPE_V8HI:
+ case V4SI_FTYPE_V4DF:
+ case V4SI_FTYPE_V2DF:
+ case V4HI_FTYPE_V4HI:
+ case V4DF_FTYPE_V4DF:
+ case V4DF_FTYPE_V4SI:
+ case V4DF_FTYPE_V4SF:
+ case V4DF_FTYPE_V2DF:
+ case V4SF_FTYPE_V4SF:
+ case V4SF_FTYPE_V4SI:
+ case V4SF_FTYPE_V8SF:
+ case V4SF_FTYPE_V4DF:
+ case V4SF_FTYPE_V2DF:
+ case V2DI_FTYPE_V2DI:
+ case V2DI_FTYPE_V16QI:
+ case V2DI_FTYPE_V8HI:
+ case V2DI_FTYPE_V4SI:
+ case V2DF_FTYPE_V2DF:
+ case V2DF_FTYPE_V4SI:
+ case V2DF_FTYPE_V4DF:
+ case V2DF_FTYPE_V4SF:
+ case V2DF_FTYPE_V2SI:
+ case V2SI_FTYPE_V2SI:
+ case V2SI_FTYPE_V4SF:
+ case V2SI_FTYPE_V2SF:
+ case V2SI_FTYPE_V2DF:
+ case V2SF_FTYPE_V2SF:
+ case V2SF_FTYPE_V2SI:
+ nargs = 1;
+ break;
+ case V4SF_FTYPE_V4SF_VEC_MERGE:
+ case V2DF_FTYPE_V2DF_VEC_MERGE:
+ return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
+ case FLOAT128_FTYPE_FLOAT128_FLOAT128:
+ case V16QI_FTYPE_V16QI_V16QI:
+ case V16QI_FTYPE_V8HI_V8HI:
+ case V8QI_FTYPE_V8QI_V8QI:
+ case V8QI_FTYPE_V4HI_V4HI:
+ case V8HI_FTYPE_V8HI_V8HI:
+ case V8HI_FTYPE_V16QI_V16QI:
+ case V8HI_FTYPE_V4SI_V4SI:
+ case V8SF_FTYPE_V8SF_V8SF:
+ case V8SF_FTYPE_V8SF_V8SI:
+ case V4SI_FTYPE_V4SI_V4SI:
+ case V4SI_FTYPE_V8HI_V8HI:
+ case V4SI_FTYPE_V4SF_V4SF:
+ case V4SI_FTYPE_V2DF_V2DF:
+ case V4HI_FTYPE_V4HI_V4HI:
+ case V4HI_FTYPE_V8QI_V8QI:
+ case V4HI_FTYPE_V2SI_V2SI:
+ case V4DF_FTYPE_V4DF_V4DF:
+ case V4DF_FTYPE_V4DF_V4DI:
+ case V4SF_FTYPE_V4SF_V4SF:
+ case V4SF_FTYPE_V4SF_V4SI:
+ case V4SF_FTYPE_V4SF_V2SI:
+ case V4SF_FTYPE_V4SF_V2DF:
+ case V4SF_FTYPE_V4SF_DI:
+ case V4SF_FTYPE_V4SF_SI:
+ case V2DI_FTYPE_V2DI_V2DI:
+ case V2DI_FTYPE_V16QI_V16QI:
+ case V2DI_FTYPE_V4SI_V4SI:
+ case V2DI_FTYPE_V2DI_V16QI:
+ case V2DI_FTYPE_V2DF_V2DF:
+ case V2SI_FTYPE_V2SI_V2SI:
+ case V2SI_FTYPE_V4HI_V4HI:
+ case V2SI_FTYPE_V2SF_V2SF:
+ case V2DF_FTYPE_V2DF_V2DF:
+ case V2DF_FTYPE_V2DF_V4SF:
+ case V2DF_FTYPE_V2DF_V2DI:
+ case V2DF_FTYPE_V2DF_DI:
+ case V2DF_FTYPE_V2DF_SI:
+ case V2SF_FTYPE_V2SF_V2SF:
+ case V1DI_FTYPE_V1DI_V1DI:
+ case V1DI_FTYPE_V8QI_V8QI:
+ case V1DI_FTYPE_V2SI_V2SI:
+ if (comparison == UNKNOWN)
+ return ix86_expand_binop_builtin (icode, exp, target);
+ nargs = 2;
+ break;
+ case V4SF_FTYPE_V4SF_V4SF_SWAP:
+ case V2DF_FTYPE_V2DF_V2DF_SWAP:
+ gcc_assert (comparison != UNKNOWN);
+ nargs = 2;
+ swap = true;
+ break;
+ case V8HI_FTYPE_V8HI_V8HI_COUNT:
+ case V8HI_FTYPE_V8HI_SI_COUNT:
+ case V4SI_FTYPE_V4SI_V4SI_COUNT:
+ case V4SI_FTYPE_V4SI_SI_COUNT:
+ case V4HI_FTYPE_V4HI_V4HI_COUNT:
+ case V4HI_FTYPE_V4HI_SI_COUNT:
+ case V2DI_FTYPE_V2DI_V2DI_COUNT:
+ case V2DI_FTYPE_V2DI_SI_COUNT:
+ case V2SI_FTYPE_V2SI_V2SI_COUNT:
+ case V2SI_FTYPE_V2SI_SI_COUNT:
+ case V1DI_FTYPE_V1DI_V1DI_COUNT:
+ case V1DI_FTYPE_V1DI_SI_COUNT:
+ nargs = 2;
+ last_arg_count = true;
+ break;
+ case UINT64_FTYPE_UINT64_UINT64:
+ case UINT_FTYPE_UINT_UINT:
+ case UINT_FTYPE_UINT_USHORT:
+ case UINT_FTYPE_UINT_UCHAR:
+ nargs = 2;
+ break;
+ case V2DI2TI_FTYPE_V2DI_INT:
+ nargs = 2;
+ rmode = V2DImode;
+ nargs_constant = 1;
+ break;
+ case V8HI_FTYPE_V8HI_INT:
+ case V8SF_FTYPE_V8SF_INT:
+ case V4SI_FTYPE_V4SI_INT:
+ case V4SI_FTYPE_V8SI_INT:
+ case V4HI_FTYPE_V4HI_INT:
+ case V4DF_FTYPE_V4DF_INT:
+ case V4SF_FTYPE_V4SF_INT:
+ case V4SF_FTYPE_V8SF_INT:
+ case V2DI_FTYPE_V2DI_INT:
+ case V2DF_FTYPE_V2DF_INT:
+ case V2DF_FTYPE_V4DF_INT:
+ nargs = 2;
+ nargs_constant = 1;
+ break;
+ case V16QI_FTYPE_V16QI_V16QI_V16QI:
+ case V8SF_FTYPE_V8SF_V8SF_V8SF:
+ case V4DF_FTYPE_V4DF_V4DF_V4DF:
+ case V4SF_FTYPE_V4SF_V4SF_V4SF:
+ case V2DF_FTYPE_V2DF_V2DF_V2DF:
+ nargs = 3;
+ break;
+ case V16QI_FTYPE_V16QI_V16QI_INT:
+ case V8HI_FTYPE_V8HI_V8HI_INT:
+ case V8SI_FTYPE_V8SI_V8SI_INT:
+ case V8SI_FTYPE_V8SI_V4SI_INT:
+ case V8SF_FTYPE_V8SF_V8SF_INT:
+ case V8SF_FTYPE_V8SF_V4SF_INT:
+ case V4SI_FTYPE_V4SI_V4SI_INT:
+ case V4DF_FTYPE_V4DF_V4DF_INT:
+ case V4DF_FTYPE_V4DF_V2DF_INT:
+ case V4SF_FTYPE_V4SF_V4SF_INT:
+ case V2DI_FTYPE_V2DI_V2DI_INT:
+ case V2DF_FTYPE_V2DF_V2DF_INT:
+ nargs = 3;
+ nargs_constant = 1;
+ break;
+ case V2DI2TI_FTYPE_V2DI_V2DI_INT:
+ nargs = 3;
+ rmode = V2DImode;
+ nargs_constant = 1;
+ break;
+ case V1DI2DI_FTYPE_V1DI_V1DI_INT:
+ nargs = 3;
+ rmode = DImode;
+ nargs_constant = 1;
+ break;
+ case V2DI_FTYPE_V2DI_UINT_UINT:
+ nargs = 3;
+ nargs_constant = 2;
+ break;
+ case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
+ nargs = 4;
+ nargs_constant = 2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ gcc_assert (nargs <= ARRAY_SIZE (args));
+
+ if (comparison != UNKNOWN)
+ {
+ gcc_assert (nargs == 2);
+ return ix86_expand_sse_compare (d, exp, target, swap);
+ }
+
+ if (rmode == VOIDmode || rmode == tmode)
+ {
+ if (optimize
+ || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_p->operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ real_target = target;
+ }
+ else
+ {
+ target = gen_reg_rtx (rmode);
+ real_target = simplify_gen_subreg (tmode, target, rmode, 0);
+ }
+
+ for (i = 0; i < nargs; i++)
+ {
+ tree arg = CALL_EXPR_ARG (exp, i);
+ rtx op = expand_normal (arg);
+ enum machine_mode mode = insn_p->operand[i + 1].mode;
+ bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
+
+ if (last_arg_count && (i + 1) == nargs)
+ {
+ /* SIMD shift insns take either an 8-bit immediate or
+ register as count. But builtin functions take int as
+ count. If count doesn't match, we put it in register. */
+ if (!match)
+ {
+ op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
+ if (!(*insn_p->operand[i + 1].predicate) (op, mode))
+ op = copy_to_reg (op);
+ }
+ }
+ else if ((nargs - i) <= nargs_constant)
+ {
+ if (!match)
+ switch (icode)
+ {
+ case CODE_FOR_sse4_1_roundpd:
+ case CODE_FOR_sse4_1_roundps:
+ case CODE_FOR_sse4_1_roundsd:
+ case CODE_FOR_sse4_1_roundss:
+ case CODE_FOR_sse4_1_blendps:
+ case CODE_FOR_avx_blendpd256:
+ case CODE_FOR_avx_vpermilv4df:
+ case CODE_FOR_avx_roundpd256:
+ case CODE_FOR_avx_roundps256:
+ error ("the last argument must be a 4-bit immediate");
+ return const0_rtx;
+
+ case CODE_FOR_sse4_1_blendpd:
+ case CODE_FOR_avx_vpermilv2df:
+ error ("the last argument must be a 2-bit immediate");
+ return const0_rtx;
+
+ case CODE_FOR_avx_vextractf128v4df:
+ case CODE_FOR_avx_vextractf128v8sf:
+ case CODE_FOR_avx_vextractf128v8si:
+ case CODE_FOR_avx_vinsertf128v4df:
+ case CODE_FOR_avx_vinsertf128v8sf:
+ case CODE_FOR_avx_vinsertf128v8si:
+ error ("the last argument must be a 1-bit immediate");
+ return const0_rtx;
+
+ case CODE_FOR_avx_cmpsdv2df3:
+ case CODE_FOR_avx_cmpssv4sf3:
+ case CODE_FOR_avx_cmppdv2df3:
+ case CODE_FOR_avx_cmppsv4sf3:
+ case CODE_FOR_avx_cmppdv4df3:
+ case CODE_FOR_avx_cmppsv8sf3:
+ error ("the last argument must be a 5-bit immediate");
+ return const0_rtx;
+
+ default:
+ switch (nargs_constant)
+ {
+ case 2:
+ if ((nargs - i) == nargs_constant)
+ {
+ error ("the next to last argument must be an 8-bit immediate");
+ break;
+ }
+ case 1:
+ error ("the last argument must be an 8-bit immediate");
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ return const0_rtx;
+ }
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ op = safe_vector_operand (op, mode);
+
+ /* If we aren't optimizing, only allow one memory operand to
+ be generated. */
+ if (memory_operand (op, mode))
+ num_memory++;
+
+ if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
+ {
+ if (optimize || !match || num_memory > 1)
+ op = copy_to_mode_reg (mode, op);
+ }
+ else
+ {
+ op = copy_to_reg (op);
+ op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
+ }
+ }
+
+ args[i].op = op;
+ args[i].mode = mode;
+ }
+
+ switch (nargs)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (real_target, args[0].op);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+ args[2].op);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+ args[2].op, args[3].op);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of special insns
+ with variable number of operands. */
+
+static rtx
+ix86_expand_special_args_builtin (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ tree arg;
+ rtx pat, op;
+ unsigned int i, nargs, arg_adjust, memory;
+ struct
+ {
+ rtx op;
+ enum machine_mode mode;
+ } args[2];
+ enum insn_code icode = d->icode;
+ bool last_arg_constant = false;
+ const struct insn_data *insn_p = &insn_data[icode];
+ enum machine_mode tmode = insn_p->operand[0].mode;
+ enum { load, store } klass;
+
+ switch ((enum ix86_special_builtin_type) d->flag)
+ {
+ case VOID_FTYPE_VOID:
+ emit_insn (GEN_FCN (icode) (target));
+ return 0;
+ case V2DI_FTYPE_PV2DI:
+ case V32QI_FTYPE_PCCHAR:
+ case V16QI_FTYPE_PCCHAR:
+ case V8SF_FTYPE_PCV4SF:
+ case V8SF_FTYPE_PCFLOAT:
+ case V4SF_FTYPE_PCFLOAT:
+ case V4DF_FTYPE_PCV2DF:
+ case V4DF_FTYPE_PCDOUBLE:
+ case V2DF_FTYPE_PCDOUBLE:
+ nargs = 1;
+ klass = load;
+ memory = 0;
+ break;
+ case VOID_FTYPE_PV2SF_V4SF:
+ case VOID_FTYPE_PV4DI_V4DI:
+ case VOID_FTYPE_PV2DI_V2DI:
+ case VOID_FTYPE_PCHAR_V32QI:
+ case VOID_FTYPE_PCHAR_V16QI:
+ case VOID_FTYPE_PFLOAT_V8SF:
+ case VOID_FTYPE_PFLOAT_V4SF:
+ case VOID_FTYPE_PDOUBLE_V4DF:
+ case VOID_FTYPE_PDOUBLE_V2DF:
+ case VOID_FTYPE_PDI_DI:
+ case VOID_FTYPE_PINT_INT:
+ nargs = 1;
+ klass = store;
+ /* Reserve memory operand for target. */
+ memory = ARRAY_SIZE (args);
+ break;
+ case V4SF_FTYPE_V4SF_PCV2SF:
+ case V2DF_FTYPE_V2DF_PCDOUBLE:
+ nargs = 2;
+ klass = load;
+ memory = 1;
+ break;
+ case V8SF_FTYPE_PCV8SF_V8SF:
+ case V4DF_FTYPE_PCV4DF_V4DF:
+ case V4SF_FTYPE_PCV4SF_V4SF:
+ case V2DF_FTYPE_PCV2DF_V2DF:
+ nargs = 2;
+ klass = load;
+ memory = 0;
+ break;
+ case VOID_FTYPE_PV8SF_V8SF_V8SF:
+ case VOID_FTYPE_PV4DF_V4DF_V4DF:
+ case VOID_FTYPE_PV4SF_V4SF_V4SF:
+ case VOID_FTYPE_PV2DF_V2DF_V2DF:
+ nargs = 2;
+ klass = store;
+ /* Reserve memory operand for target. */
+ memory = ARRAY_SIZE (args);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ gcc_assert (nargs <= ARRAY_SIZE (args));
+
+ if (klass == store)
+ {
+ arg = CALL_EXPR_ARG (exp, 0);
+ op = expand_normal (arg);
+ gcc_assert (target == 0);
+ target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
+ arg_adjust = 1;
+ }
+ else
+ {
+ arg_adjust = 0;
+ if (optimize
+ || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_p->operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ }
+
+ for (i = 0; i < nargs; i++)
+ {
+ enum machine_mode mode = insn_p->operand[i + 1].mode;
+ bool match;
+
+ arg = CALL_EXPR_ARG (exp, i + arg_adjust);
+ op = expand_normal (arg);
+ match = (*insn_p->operand[i + 1].predicate) (op, mode);
+
+ if (last_arg_constant && (i + 1) == nargs)
+ {
+ if (!match)
+ switch (icode)
+ {
+ default:
+ error ("the last argument must be an 8-bit immediate");
+ return const0_rtx;
+ }
+ }
+ else
+ {
+ if (i == memory)
+ {
+ /* This must be the memory operand. */
+ op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
+ gcc_assert (GET_MODE (op) == mode
+ || GET_MODE (op) == VOIDmode);
+ }
+ else
+ {
+ /* This must be register. */
+ if (VECTOR_MODE_P (mode))
+ op = safe_vector_operand (op, mode);
+
+ gcc_assert (GET_MODE (op) == mode
+ || GET_MODE (op) == VOIDmode);
+ op = copy_to_mode_reg (mode, op);
+ }
+ }
+
+ args[i].op = op;
+ args[i].mode = mode;
+ }
+
+ switch (nargs)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (target, args[0].op);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return klass == store ? 0 : target;
+}
+
+/* Return the integer constant in ARG. Constrain it to be in the range
+ of the subparts of VEC_TYPE; issue an error if not. */
+
+static int
+get_element_number (tree vec_type, tree arg)
+{
+ unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
+
+ if (!host_integerp (arg, 1)
+ || (elt = tree_low_cst (arg, 1), elt > max))
+ {
+ error ("selector must be an integer constant in the range 0..%wi", max);
+ return 0;
+ }
+
+ return elt;
+}
+
+/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
+ ix86_expand_vector_init. We DO have language-level syntax for this, in
+ the form of (type){ init-list }. Except that since we can't place emms
+ instructions from inside the compiler, we can't allow the use of MMX
+ registers unless the user explicitly asks for it. So we do *not* define
+ vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
+ we have builtins invoked by mmintrin.h that gives us license to emit
+ these sorts of instructions. */
+
+static rtx
+ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
+{
+ enum machine_mode tmode = TYPE_MODE (type);
+ enum machine_mode inner_mode = GET_MODE_INNER (tmode);
+ int i, n_elt = GET_MODE_NUNITS (tmode);
+ rtvec v = rtvec_alloc (n_elt);
+
+ gcc_assert (VECTOR_MODE_P (tmode));
+ gcc_assert (call_expr_nargs (exp) == n_elt);
+
+ for (i = 0; i < n_elt; ++i)
+ {
+ rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
+ RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
+ }
+
+ if (!target || !register_operand (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
+ return target;
+}
+
+/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
+ ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
+ had a language-level syntax for referencing vector elements. */
+
+static rtx
+ix86_expand_vec_ext_builtin (tree exp, rtx target)
+{
+ enum machine_mode tmode, mode0;
+ tree arg0, arg1;
+ int elt;
+ rtx op0;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+
+ op0 = expand_normal (arg0);
+ elt = get_element_number (TREE_TYPE (arg0), arg1);
+
+ tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+ mode0 = TYPE_MODE (TREE_TYPE (arg0));
+ gcc_assert (VECTOR_MODE_P (mode0));
+
+ op0 = force_reg (mode0, op0);
+
+ if (optimize || !target || !register_operand (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ ix86_expand_vector_extract (true, target, op0, elt);
+
+ return target;
+}
+
+/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
+ ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
+ a language-level syntax for referencing vector elements. */
+
+static rtx
+ix86_expand_vec_set_builtin (tree exp)
+{
+ enum machine_mode tmode, mode1;
+ tree arg0, arg1, arg2;
+ int elt;
+ rtx op0, op1, target;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+
+ tmode = TYPE_MODE (TREE_TYPE (arg0));
+ mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
+ gcc_assert (VECTOR_MODE_P (tmode));
+
+ op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
+ op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
+ elt = get_element_number (TREE_TYPE (arg0), arg2);
+
+ if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
+ op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
+
+ op0 = force_reg (tmode, op0);
+ op1 = force_reg (mode1, op1);
+
+ /* OP0 is the source of these builtin functions and shouldn't be
+ modified. Create a copy, use it and return it as target. */
+ target = gen_reg_rtx (tmode);
+ emit_move_insn (target, op0);
+ ix86_expand_vector_set (true, target, op1, elt);
+
+ return target;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+ with result going to TARGET if that's convenient
+ (and in mode MODE if that's convenient).
+ SUBTARGET may be used as the target for computing one of EXP's operands.
+ IGNORE is nonzero if the value is to be ignored. */
+
+static rtx
+ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ int ignore ATTRIBUTE_UNUSED)
+{
+ const struct builtin_description *d;
+ size_t i;
+ enum insn_code icode;
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ tree arg0, arg1, arg2;
+ rtx op0, op1, op2, pat;
+ enum machine_mode mode0, mode1, mode2;
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+ /* Determine whether the builtin function is available under the current ISA.
+ Originally the builtin was not created if it wasn't applicable to the
+ current ISA based on the command line switches. With function specific
+ options, we need to check in the context of the function making the call
+ whether it is supported. */
+ if (ix86_builtins_isa[fcode].isa
+ && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
+ {
+ char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
+ NULL, NULL, false);
+
+ if (!opts)
+ error ("%qE needs unknown isa option", fndecl);
+ else
+ {
+ gcc_assert (opts != NULL);
+ error ("%qE needs isa option %s", fndecl, opts);
+ free (opts);
+ }
+ return const0_rtx;
+ }
+
+ switch (fcode)
+ {
+ case IX86_BUILTIN_MASKMOVQ:
+ case IX86_BUILTIN_MASKMOVDQU:
+ icode = (fcode == IX86_BUILTIN_MASKMOVQ
+ ? CODE_FOR_mmx_maskmovq
+ : CODE_FOR_sse2_maskmovdqu);
+ /* Note the arg order is different from the operand order. */
+ arg1 = CALL_EXPR_ARG (exp, 0);
+ arg2 = CALL_EXPR_ARG (exp, 1);
+ arg0 = CALL_EXPR_ARG (exp, 2);
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ mode0 = insn_data[icode].operand[0].mode;
+ mode1 = insn_data[icode].operand[1].mode;
+ mode2 = insn_data[icode].operand[2].mode;
+
+ op0 = force_reg (Pmode, op0);
+ op0 = gen_rtx_MEM (mode1, op0);
+
+ if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+ if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
+ op2 = copy_to_mode_reg (mode2, op2);
+ pat = GEN_FCN (icode) (op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return 0;
+
+ case IX86_BUILTIN_LDMXCSR:
+ op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+ target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
+ emit_move_insn (target, op0);
+ emit_insn (gen_sse_ldmxcsr (target));
+ return 0;
+
+ case IX86_BUILTIN_STMXCSR:
+ target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
+ emit_insn (gen_sse_stmxcsr (target));
+ return copy_to_mode_reg (SImode, target);
+
+ case IX86_BUILTIN_CLFLUSH:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+ icode = CODE_FOR_sse2_clflush;
+ if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
+ op0 = copy_to_mode_reg (Pmode, op0);
+
+ emit_insn (gen_sse2_clflush (op0));
+ return 0;
+
+ case IX86_BUILTIN_MONITOR:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ if (!REG_P (op0))
+ op0 = copy_to_mode_reg (Pmode, op0);
+ if (!REG_P (op1))
+ op1 = copy_to_mode_reg (SImode, op1);
+ if (!REG_P (op2))
+ op2 = copy_to_mode_reg (SImode, op2);
+ emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
+ return 0;
+
+ case IX86_BUILTIN_MWAIT:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ if (!REG_P (op0))
+ op0 = copy_to_mode_reg (SImode, op0);
+ if (!REG_P (op1))
+ op1 = copy_to_mode_reg (SImode, op1);
+ emit_insn (gen_sse3_mwait (op0, op1));
+ return 0;
+
+ case IX86_BUILTIN_VEC_INIT_V2SI:
+ case IX86_BUILTIN_VEC_INIT_V4HI:
+ case IX86_BUILTIN_VEC_INIT_V8QI:
+ return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
+
+ case IX86_BUILTIN_VEC_EXT_V2DF:
+ case IX86_BUILTIN_VEC_EXT_V2DI:
+ case IX86_BUILTIN_VEC_EXT_V4SF:
+ case IX86_BUILTIN_VEC_EXT_V4SI:
+ case IX86_BUILTIN_VEC_EXT_V8HI:
+ case IX86_BUILTIN_VEC_EXT_V2SI:
+ case IX86_BUILTIN_VEC_EXT_V4HI:
+ case IX86_BUILTIN_VEC_EXT_V16QI:
+ return ix86_expand_vec_ext_builtin (exp, target);
+
+ case IX86_BUILTIN_VEC_SET_V2DI:
+ case IX86_BUILTIN_VEC_SET_V4SF:
+ case IX86_BUILTIN_VEC_SET_V4SI:
+ case IX86_BUILTIN_VEC_SET_V8HI:
+ case IX86_BUILTIN_VEC_SET_V4HI:
+ case IX86_BUILTIN_VEC_SET_V16QI:
+ return ix86_expand_vec_set_builtin (exp);
+
+ case IX86_BUILTIN_INFQ:
+ {
+ REAL_VALUE_TYPE inf;
+ rtx tmp;
+
+ real_inf (&inf);
+ tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
+
+ tmp = validize_mem (force_const_mem (mode, tmp));
+
+ if (target == 0)
+ target = gen_reg_rtx (mode);
+
+ emit_move_insn (target, tmp);
+ return target;
+ }
+
+ default:
+ break;
+ }
+
+ for (i = 0, d = bdesc_special_args;
+ i < ARRAY_SIZE (bdesc_special_args);
+ i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_special_args_builtin (d, exp, target);
+
+ for (i = 0, d = bdesc_args;
+ i < ARRAY_SIZE (bdesc_args);
+ i++, d++)
+ if (d->code == fcode)
+ switch (fcode)
+ {
+ case IX86_BUILTIN_FABSQ:
+ case IX86_BUILTIN_COPYSIGNQ:
+ if (!TARGET_SSE2)
+ /* Emit a normal call if SSE2 isn't available. */
+ return expand_call (exp, target, ignore);
+ default:
+ return ix86_expand_args_builtin (d, exp, target);
+ }
+
+ for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_comi (d, exp, target);
+
+ for (i = 0, d = bdesc_pcmpestr;
+ i < ARRAY_SIZE (bdesc_pcmpestr);
+ i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_pcmpestr (d, exp, target);
+
+ for (i = 0, d = bdesc_pcmpistr;
+ i < ARRAY_SIZE (bdesc_pcmpistr);
+ i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_sse_pcmpistr (d, exp, target);
+
+ for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
+ if (d->code == fcode)
+ return ix86_expand_multi_arg_builtin (d->icode, exp, target,
+ (enum multi_arg_type)d->flag,
+ d->comparison);
+
+ gcc_unreachable ();
+}
+
+/* Returns a function decl for a vectorized version of the builtin function
+ with builtin function code FN and the result vector type TYPE, or NULL_TREE
+ if it is not available. */
+
+static tree
+ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
+ tree type_in)
+{
+ enum machine_mode in_mode, out_mode;
+ int in_n, out_n;
+
+ if (TREE_CODE (type_out) != VECTOR_TYPE
+ || TREE_CODE (type_in) != VECTOR_TYPE)
+ return NULL_TREE;
+
+ out_mode = TYPE_MODE (TREE_TYPE (type_out));
+ out_n = TYPE_VECTOR_SUBPARTS (type_out);
+ in_mode = TYPE_MODE (TREE_TYPE (type_in));
+ in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+ switch (fn)
+ {
+ case BUILT_IN_SQRT:
+ if (out_mode == DFmode && out_n == 2
+ && in_mode == DFmode && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_SQRTPD];
+ break;
+
+ case BUILT_IN_SQRTF:
+ if (out_mode == SFmode && out_n == 4
+ && in_mode == SFmode && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
+ break;
+
+ case BUILT_IN_LRINT:
+ if (out_mode == SImode && out_n == 4
+ && in_mode == DFmode && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+ break;
+
+ case BUILT_IN_LRINTF:
+ if (out_mode == SImode && out_n == 4
+ && in_mode == SFmode && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
+ break;
+
+ default:
+ ;
+ }
+
+ /* Dispatch to a handler for a vectorization library. */
+ if (ix86_veclib_handler)
+ return (*ix86_veclib_handler)(fn, type_out, type_in);
+
+ return NULL_TREE;
+}
+
+/* Handler for an SVML-style interface to
+ a library with vectorized intrinsics. */
+
+static tree
+ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
+{
+ char name[20];
+ tree fntype, new_fndecl, args;
+ unsigned arity;
+ const char *bname;
+ enum machine_mode el_mode, in_mode;
+ int n, in_n;
+
+ /* The SVML is suitable for unsafe math only. */
+ if (!flag_unsafe_math_optimizations)
+ return NULL_TREE;
+
+ el_mode = TYPE_MODE (TREE_TYPE (type_out));
+ n = TYPE_VECTOR_SUBPARTS (type_out);
+ in_mode = TYPE_MODE (TREE_TYPE (type_in));
+ in_n = TYPE_VECTOR_SUBPARTS (type_in);
+ if (el_mode != in_mode
+ || n != in_n)
+ return NULL_TREE;
+
+ switch (fn)
+ {
+ case BUILT_IN_EXP:
+ case BUILT_IN_LOG:
+ case BUILT_IN_LOG10:
+ case BUILT_IN_POW:
+ case BUILT_IN_TANH:
+ case BUILT_IN_TAN:
+ case BUILT_IN_ATAN:
+ case BUILT_IN_ATAN2:
+ case BUILT_IN_ATANH:
+ case BUILT_IN_CBRT:
+ case BUILT_IN_SINH:
+ case BUILT_IN_SIN:
+ case BUILT_IN_ASINH:
+ case BUILT_IN_ASIN:
+ case BUILT_IN_COSH:
+ case BUILT_IN_COS:
+ case BUILT_IN_ACOSH:
+ case BUILT_IN_ACOS:
+ if (el_mode != DFmode || n != 2)
+ return NULL_TREE;
+ break;
+
+ case BUILT_IN_EXPF:
+ case BUILT_IN_LOGF:
+ case BUILT_IN_LOG10F:
+ case BUILT_IN_POWF:
+ case BUILT_IN_TANHF:
+ case BUILT_IN_TANF:
+ case BUILT_IN_ATANF:
+ case BUILT_IN_ATAN2F:
+ case BUILT_IN_ATANHF:
+ case BUILT_IN_CBRTF:
+ case BUILT_IN_SINHF:
+ case BUILT_IN_SINF:
+ case BUILT_IN_ASINHF:
+ case BUILT_IN_ASINF:
+ case BUILT_IN_COSHF:
+ case BUILT_IN_COSF:
+ case BUILT_IN_ACOSHF:
+ case BUILT_IN_ACOSF:
+ if (el_mode != SFmode || n != 4)
+ return NULL_TREE;
+ break;
+
+ default:
+ return NULL_TREE;
+ }
+
+ bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
+
+ if (fn == BUILT_IN_LOGF)
+ strcpy (name, "vmlsLn4");
+ else if (fn == BUILT_IN_LOG)
+ strcpy (name, "vmldLn2");
+ else if (n == 4)
+ {
+ sprintf (name, "vmls%s", bname+10);
+ name[strlen (name)-1] = '4';
+ }
+ else
+ sprintf (name, "vmld%s2", bname+10);
+
+ /* Convert to uppercase. */
+ name[4] &= ~0x20;
+
+ arity = 0;
+ for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
+ args = TREE_CHAIN (args))
+ arity++;
+
+ if (arity == 1)
+ fntype = build_function_type_list (type_out, type_in, NULL);
+ else
+ fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+
+ /* Build a function declaration for the vectorized function. */
+ new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
+ TREE_PUBLIC (new_fndecl) = 1;
+ DECL_EXTERNAL (new_fndecl) = 1;
+ DECL_IS_NOVOPS (new_fndecl) = 1;
+ TREE_READONLY (new_fndecl) = 1;
+
+ return new_fndecl;
+}
+
+/* Handler for an ACML-style interface to
+ a library with vectorized intrinsics. */
+
+static tree
+ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
+{
+ char name[20] = "__vr.._";
+ tree fntype, new_fndecl, args;
+ unsigned arity;
+ const char *bname;
+ enum machine_mode el_mode, in_mode;
+ int n, in_n;
+
+ /* The ACML is 64bits only and suitable for unsafe math only as
+ it does not correctly support parts of IEEE with the required
+ precision such as denormals. */
+ if (!TARGET_64BIT
+ || !flag_unsafe_math_optimizations)
+ return NULL_TREE;
+
+ el_mode = TYPE_MODE (TREE_TYPE (type_out));
+ n = TYPE_VECTOR_SUBPARTS (type_out);
+ in_mode = TYPE_MODE (TREE_TYPE (type_in));
+ in_n = TYPE_VECTOR_SUBPARTS (type_in);
+ if (el_mode != in_mode
+ || n != in_n)
+ return NULL_TREE;
+
+ switch (fn)
+ {
+ case BUILT_IN_SIN:
+ case BUILT_IN_COS:
+ case BUILT_IN_EXP:
+ case BUILT_IN_LOG:
+ case BUILT_IN_LOG2:
+ case BUILT_IN_LOG10:
+ name[4] = 'd';
+ name[5] = '2';
+ if (el_mode != DFmode
+ || n != 2)
+ return NULL_TREE;
+ break;
+
+ case BUILT_IN_SINF:
+ case BUILT_IN_COSF:
+ case BUILT_IN_EXPF:
+ case BUILT_IN_POWF:
+ case BUILT_IN_LOGF:
+ case BUILT_IN_LOG2F:
+ case BUILT_IN_LOG10F:
+ name[4] = 's';
+ name[5] = '4';
+ if (el_mode != SFmode
+ || n != 4)
+ return NULL_TREE;
+ break;
+
+ default:
+ return NULL_TREE;
+ }
+
+ bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
+ sprintf (name + 7, "%s", bname+10);
+
+ arity = 0;
+ for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
+ args = TREE_CHAIN (args))
+ arity++;
+
+ if (arity == 1)
+ fntype = build_function_type_list (type_out, type_in, NULL);
+ else
+ fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+
+ /* Build a function declaration for the vectorized function. */
+ new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
+ TREE_PUBLIC (new_fndecl) = 1;
+ DECL_EXTERNAL (new_fndecl) = 1;
+ DECL_IS_NOVOPS (new_fndecl) = 1;
+ TREE_READONLY (new_fndecl) = 1;
+
+ return new_fndecl;
+}
+
+
+/* Returns a decl of a function that implements conversion of an integer vector
+ into a floating-point vector, or vice-versa. TYPE is the type of the integer
+ side of the conversion.
+ Return NULL_TREE if it is not available. */
+
+static tree
+ix86_vectorize_builtin_conversion (unsigned int code, tree type)
+{
+ if (TREE_CODE (type) != VECTOR_TYPE)
+ return NULL_TREE;
+
+ switch (code)
+ {
+ case FLOAT_EXPR:
+ switch (TYPE_MODE (type))
+ {
+ case V4SImode:
+ return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
+ default:
+ return NULL_TREE;
+ }
+
+ case FIX_TRUNC_EXPR:
+ switch (TYPE_MODE (type))
+ {
+ case V4SImode:
+ return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
+ default:
+ return NULL_TREE;
+ }
+ default:
+ return NULL_TREE;
+
+ }
+}
+
+/* Returns a code for a target-specific builtin that implements
+ reciprocal of the function, or NULL_TREE if not available. */
+
+static tree
+ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
+ bool sqrt ATTRIBUTE_UNUSED)
+{
+ if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations))
+ return NULL_TREE;
+
+ if (md_fn)
+ /* Machine dependent builtins. */
+ switch (fn)
+ {
+ /* Vectorized version of sqrt to rsqrt conversion. */
+ case IX86_BUILTIN_SQRTPS_NR:
+ return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
+
+ default:
+ return NULL_TREE;
+ }
+ else
+ /* Normal builtins. */
+ switch (fn)
+ {
+ /* Sqrt to rsqrt conversion. */
+ case BUILT_IN_SQRTF:
+ return ix86_builtins[IX86_BUILTIN_RSQRTF];
+
+ default:
+ return NULL_TREE;
+ }
+}
+
+/* Store OPERAND to the memory after reload is completed. This means
+ that we can't easily use assign_stack_local. */
+rtx
+ix86_force_to_memory (enum machine_mode mode, rtx operand)
+{
+ rtx result;
+
+ gcc_assert (reload_completed);
+ if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
+ {
+ result = gen_rtx_MEM (mode,
+ gen_rtx_PLUS (Pmode,
+ stack_pointer_rtx,
+ GEN_INT (-RED_ZONE_SIZE)));
+ emit_move_insn (result, operand);
+ }
+ else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
+ {
+ switch (mode)
+ {
+ case HImode:
+ case SImode:
+ operand = gen_lowpart (DImode, operand);
+ /* FALLTHRU */
+ case DImode:
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (DImode,
+ gen_rtx_PRE_DEC (DImode,
+ stack_pointer_rtx)),
+ operand));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ result = gen_rtx_MEM (mode, stack_pointer_rtx);
+ }
+ else
+ {
+ switch (mode)
+ {
+ case DImode:
+ {
+ rtx operands[2];
+ split_di (&operand, 1, operands, operands + 1);
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (SImode,
+ gen_rtx_PRE_DEC (Pmode,
+ stack_pointer_rtx)),
+ operands[1]));
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (SImode,
+ gen_rtx_PRE_DEC (Pmode,
+ stack_pointer_rtx)),
+ operands[0]));
+ }
+ break;
+ case HImode:
+ /* Store HImodes as SImodes. */
+ operand = gen_lowpart (SImode, operand);
+ /* FALLTHRU */
+ case SImode:
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (GET_MODE (operand),
+ gen_rtx_PRE_DEC (SImode,
+ stack_pointer_rtx)),
+ operand));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ result = gen_rtx_MEM (mode, stack_pointer_rtx);
+ }
+ return result;
+}
+
+/* Free operand from the memory. */
+void
+ix86_free_from_memory (enum machine_mode mode)
+{
+ if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
+ {
+ int size;
+
+ if (mode == DImode || TARGET_64BIT)
+ size = 8;
+ else
+ size = 4;
+ /* Use LEA to deallocate stack space. In peephole2 it will be converted
+ to pop or add instruction if registers are available. */
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (size))));
+ }
+}
+
+/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
+ QImode must go into class Q_REGS.
+ Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
+ movdf to do mem-to-mem moves through integer regs. */
+enum reg_class
+ix86_preferred_reload_class (rtx x, enum reg_class regclass)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ /* We're only allowed to return a subclass of CLASS. Many of the
+ following checks fail for NO_REGS, so eliminate that early. */
+ if (regclass == NO_REGS)
+ return NO_REGS;
+
+ /* All classes can load zeros. */
+ if (x == CONST0_RTX (mode))
+ return regclass;
+
+ /* Force constants into memory if we are loading a (nonzero) constant into
+ an MMX or SSE register. This is because there are no MMX/SSE instructions
+ to load from a constant. */
+ if (CONSTANT_P (x)
+ && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
+ return NO_REGS;
+
+ /* Prefer SSE regs only, if we can use them for math. */
+ if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
+ return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
+
+ /* Floating-point constants need more complex checks. */
+ if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
+ {
+ /* General regs can load everything. */
+ if (reg_class_subset_p (regclass, GENERAL_REGS))
+ return regclass;
+
+ /* Floats can load 0 and 1 plus some others. Note that we eliminated
+ zero above. We only want to wind up preferring 80387 registers if
+ we plan on doing computation with them. */
+ if (TARGET_80387
+ && standard_80387_constant_p (x))
+ {
+ /* Limit class to non-sse. */
+ if (regclass == FLOAT_SSE_REGS)
+ return FLOAT_REGS;
+ if (regclass == FP_TOP_SSE_REGS)
+ return FP_TOP_REG;
+ if (regclass == FP_SECOND_SSE_REGS)
+ return FP_SECOND_REG;
+ if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
+ return regclass;
+ }
+
+ return NO_REGS;
+ }
+
+ /* Generally when we see PLUS here, it's the function invariant
+ (plus soft-fp const_int). Which can only be computed into general
+ regs. */
+ if (GET_CODE (x) == PLUS)
+ return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
+
+ /* QImode constants are easy to load, but non-constant QImode data
+ must go into Q_REGS. */
+ if (GET_MODE (x) == QImode && !CONSTANT_P (x))
+ {
+ if (reg_class_subset_p (regclass, Q_REGS))
+ return regclass;
+ if (reg_class_subset_p (Q_REGS, regclass))
+ return Q_REGS;
+ return NO_REGS;
+ }
+
+ return regclass;
+}
+
+/* Discourage putting floating-point values in SSE registers unless
+ SSE math is being used, and likewise for the 387 registers. */
+enum reg_class
+ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ /* Restrict the output reload class to the register bank that we are doing
+ math on. If we would like not to return a subset of CLASS, reject this
+ alternative: if reload cannot do this, it will still use its choice. */
+ mode = GET_MODE (x);
+ if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
+ return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
+
+ if (X87_FLOAT_MODE_P (mode))
+ {
+ if (regclass == FP_TOP_SSE_REGS)
+ return FP_TOP_REG;
+ else if (regclass == FP_SECOND_SSE_REGS)
+ return FP_SECOND_REG;
+ else
+ return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
+ }
+
+ return regclass;
+}
+
+static enum reg_class
+ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
+ enum machine_mode mode,
+ secondary_reload_info *sri ATTRIBUTE_UNUSED)
+{
+ /* QImode spills from non-QI registers require
+ intermediate register on 32bit targets. */
+ if (!in_p && mode == QImode && !TARGET_64BIT
+ && (rclass == GENERAL_REGS
+ || rclass == LEGACY_REGS
+ || rclass == INDEX_REGS))
+ {
+ int regno;
+
+ if (REG_P (x))
+ regno = REGNO (x);
+ else
+ regno = -1;
+
+ if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
+ regno = true_regnum (x);
+
+ /* Return Q_REGS if the operand is in memory. */
+ if (regno == -1)
+ return Q_REGS;
+ }
+
+ return NO_REGS;
+}
+
+/* If we are copying between general and FP registers, we need a memory
+ location. The same is true for SSE and MMX registers.
+
+ To optimize register_move_cost performance, allow inline variant.
+
+ The macro can't work reliably when one of the CLASSES is class containing
+ registers from multiple units (SSE, MMX, integer). We avoid this by never
+ combining those units in single alternative in the machine description.
+ Ensure that this constraint holds to avoid unexpected surprises.
+
+ When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
+ enforce these sanity checks. */
+
+static inline int
+inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
+ enum machine_mode mode, int strict)
+{
+ if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
+ || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
+ || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
+ || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
+ || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
+ || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
+ {
+ gcc_assert (!strict);
+ return true;
+ }
+
+ if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
+ return true;
+
+ /* ??? This is a lie. We do have moves between mmx/general, and for
+ mmx/sse2. But by saying we need secondary memory we discourage the
+ register allocator from using the mmx registers unless needed. */
+ if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
+ return true;
+
+ if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
+ {
+ /* SSE1 doesn't have any direct moves from other classes. */
+ if (!TARGET_SSE2)
+ return true;
+
+ /* If the target says that inter-unit moves are more expensive
+ than moving through memory, then don't generate them. */
+ if (!TARGET_INTER_UNIT_MOVES)
+ return true;
+
+ /* Between SSE and general, we have moves no larger than word size. */
+ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+ return true;
+ }
+
+ return false;
+}
+
+int
+ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
+ enum machine_mode mode, int strict)
+{
+ return inline_secondary_memory_needed (class1, class2, mode, strict);
+}
+
+/* Return true if the registers in CLASS cannot represent the change from
+ modes FROM to TO. */
+
+bool
+ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+ enum reg_class regclass)
+{
+ if (from == to)
+ return false;
+
+ /* x87 registers can't do subreg at all, as all values are reformatted
+ to extended precision. */
+ if (MAYBE_FLOAT_CLASS_P (regclass))
+ return true;
+
+ if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
+ {
+ /* Vector registers do not support QI or HImode loads. If we don't
+ disallow a change to these modes, reload will assume it's ok to
+ drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
+ the vec_dupv4hi pattern. */
+ if (GET_MODE_SIZE (from) < 4)
+ return true;
+
+ /* Vector registers do not support subreg with nonzero offsets, which
+ are otherwise valid for integer registers. Since we can't see
+ whether we have a nonzero offset from here, prohibit all
+ nonparadoxical subregs changing size. */
+ if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
+ return true;
+ }
+
+ return false;
+}
+
+/* Return the cost of moving data of mode M between a
+ register and memory. A value of 2 is the default; this cost is
+ relative to those in `REGISTER_MOVE_COST'.
+
+ This function is used extensively by register_move_cost that is used to
+ build tables at startup. Make it inline in this case.
+ When IN is 2, return maximum of in and out move cost.
+
+ If moving between registers and memory is more expensive than
+ between two registers, you should define this macro to express the
+ relative cost.
+
+ Model also increased moving costs of QImode registers in non
+ Q_REGS classes.
+ */
+static inline int
+inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
+ int in)
+{
+ int cost;
+ if (FLOAT_CLASS_P (regclass))
+ {
+ int index;
+ switch (mode)
+ {
+ case SFmode:
+ index = 0;
+ break;
+ case DFmode:
+ index = 1;
+ break;
+ case XFmode:
+ index = 2;
+ break;
+ default:
+ return 100;
+ }
+ if (in == 2)
+ return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
+ return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
+ }
+ if (SSE_CLASS_P (regclass))
+ {
+ int index;
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 4:
+ index = 0;
+ break;
+ case 8:
+ index = 1;
+ break;
+ case 16:
+ index = 2;
+ break;
+ default:
+ return 100;
+ }
+ if (in == 2)
+ return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
+ return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
+ }
+ if (MMX_CLASS_P (regclass))
+ {
+ int index;
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 4:
+ index = 0;
+ break;
+ case 8:
+ index = 1;
+ break;
+ default:
+ return 100;
+ }
+ if (in)
+ return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
+ return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
+ }
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 1:
+ if (Q_CLASS_P (regclass) || TARGET_64BIT)
+ {
+ if (!in)
+ return ix86_cost->int_store[0];
+ if (TARGET_PARTIAL_REG_DEPENDENCY
+ && optimize_function_for_speed_p (cfun))
+ cost = ix86_cost->movzbl_load;
+ else
+ cost = ix86_cost->int_load[0];
+ if (in == 2)
+ return MAX (cost, ix86_cost->int_store[0]);
+ return cost;
+ }
+ else
+ {
+ if (in == 2)
+ return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
+ if (in)
+ return ix86_cost->movzbl_load;
+ else
+ return ix86_cost->int_store[0] + 4;
+ }
+ break;
+ case 2:
+ if (in == 2)
+ return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
+ return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
+ default:
+ /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
+ if (mode == TFmode)
+ mode = XFmode;
+ if (in == 2)
+ cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
+ else if (in)
+ cost = ix86_cost->int_load[2];
+ else
+ cost = ix86_cost->int_store[2];
+ return (cost * (((int) GET_MODE_SIZE (mode)
+ + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
+ }
+}
+
+int
+ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
+{
+ return inline_memory_move_cost (mode, regclass, in);
+}
+
+
+/* Return the cost of moving data from a register in class CLASS1 to
+ one in class CLASS2.
+
+ It is not required that the cost always equal 2 when FROM is the same as TO;
+ on some machines it is expensive to move between registers if they are not
+ general registers. */
+
+int
+ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
+ enum reg_class class2)
+{
+ /* In case we require secondary memory, compute cost of the store followed
+ by load. In order to avoid bad register allocation choices, we need
+ for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
+
+ if (inline_secondary_memory_needed (class1, class2, mode, 0))
+ {
+ int cost = 1;
+
+ cost += inline_memory_move_cost (mode, class1, 2);
+ cost += inline_memory_move_cost (mode, class2, 2);
+
+ /* In case of copying from general_purpose_register we may emit multiple
+ stores followed by single load causing memory size mismatch stall.
+ Count this as arbitrarily high cost of 20. */
+ if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
+ cost += 20;
+
+ /* In the case of FP/MMX moves, the registers actually overlap, and we
+ have to switch modes in order to treat them differently. */
+ if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
+ || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
+ cost += 20;
+
+ return cost;
+ }
+
+ /* Moves between SSE/MMX and integer unit are expensive. */
+ if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
+ || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
+
+ /* ??? By keeping returned value relatively high, we limit the number
+ of moves between integer and MMX/SSE registers for all targets.
+ Additionally, high value prevents problem with x86_modes_tieable_p(),
+ where integer modes in MMX/SSE registers are not tieable
+ because of missing QImode and HImode moves to, from or between
+ MMX/SSE registers. */
+ return MAX (8, ix86_cost->mmxsse_to_integer);
+
+ if (MAYBE_FLOAT_CLASS_P (class1))
+ return ix86_cost->fp_move;
+ if (MAYBE_SSE_CLASS_P (class1))
+ return ix86_cost->sse_move;
+ if (MAYBE_MMX_CLASS_P (class1))
+ return ix86_cost->mmx_move;
+ return 2;
+}
+
+/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
+
+bool
+ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
+{
+ /* Flags and only flags can only hold CCmode values. */
+ if (CC_REGNO_P (regno))
+ return GET_MODE_CLASS (mode) == MODE_CC;
+ if (GET_MODE_CLASS (mode) == MODE_CC
+ || GET_MODE_CLASS (mode) == MODE_RANDOM
+ || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
+ return 0;
+ if (FP_REGNO_P (regno))
+ return VALID_FP_MODE_P (mode);
+ if (SSE_REGNO_P (regno))
+ {
+ /* We implement the move patterns for all vector modes into and
+ out of SSE registers, even when no operation instructions
+ are available. OImode move is available only when AVX is
+ enabled. */
+ return ((TARGET_AVX && mode == OImode)
+ || VALID_AVX256_REG_MODE (mode)
+ || VALID_SSE_REG_MODE (mode)
+ || VALID_SSE2_REG_MODE (mode)
+ || VALID_MMX_REG_MODE (mode)
+ || VALID_MMX_REG_MODE_3DNOW (mode));
+ }
+ if (MMX_REGNO_P (regno))
+ {
+ /* We implement the move patterns for 3DNOW modes even in MMX mode,
+ so if the register is available at all, then we can move data of
+ the given mode into or out of it. */
+ return (VALID_MMX_REG_MODE (mode)
+ || VALID_MMX_REG_MODE_3DNOW (mode));
+ }
+
+ if (mode == QImode)
+ {
+ /* Take care for QImode values - they can be in non-QI regs,
+ but then they do cause partial register stalls. */
+ if (regno <= BX_REG || TARGET_64BIT)
+ return 1;
+ if (!TARGET_PARTIAL_REG_STALL)
+ return 1;
+ return reload_in_progress || reload_completed;
+ }
+ /* We handle both integer and floats in the general purpose registers. */
+ else if (VALID_INT_MODE_P (mode))
+ return 1;
+ else if (VALID_FP_MODE_P (mode))
+ return 1;
+ else if (VALID_DFP_MODE_P (mode))
+ return 1;
+ /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
+ on to use that value in smaller contexts, this can easily force a
+ pseudo to be allocated to GENERAL_REGS. Since this is no worse than
+ supporting DImode, allow it. */
+ else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
+ return 1;
+
+ return 0;
+}
+
+/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
+ tieable integer mode. */
+
+static bool
+ix86_tieable_integer_mode_p (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case HImode:
+ case SImode:
+ return true;
+
+ case QImode:
+ return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
+
+ case DImode:
+ return TARGET_64BIT;
+
+ default:
+ return false;
+ }
+}
+
+/* Return true if MODE1 is accessible in a register that can hold MODE2
+ without copying. That is, all register classes that can hold MODE2
+ can also hold MODE1. */
+
+bool
+ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+ if (mode1 == mode2)
+ return true;
+
+ if (ix86_tieable_integer_mode_p (mode1)
+ && ix86_tieable_integer_mode_p (mode2))
+ return true;
+
+ /* MODE2 being XFmode implies fp stack or general regs, which means we
+ can tie any smaller floating point modes to it. Note that we do not
+ tie this with TFmode. */
+ if (mode2 == XFmode)
+ return mode1 == SFmode || mode1 == DFmode;
+
+ /* MODE2 being DFmode implies fp stack, general or sse regs, which means
+ that we can tie it with SFmode. */
+ if (mode2 == DFmode)
+ return mode1 == SFmode;
+
+ /* If MODE2 is only appropriate for an SSE register, then tie with
+ any other mode acceptable to SSE registers. */
+ if (GET_MODE_SIZE (mode2) == 16
+ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
+ return (GET_MODE_SIZE (mode1) == 16
+ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
+
+ /* If MODE2 is appropriate for an MMX register, then tie
+ with any other mode acceptable to MMX registers. */
+ if (GET_MODE_SIZE (mode2) == 8
+ && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
+ return (GET_MODE_SIZE (mode1) == 8
+ && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
+
+ return false;
+}
+
+/* Compute a (partial) cost for rtx X. Return true if the complete
+ cost has been computed, and false if subexpressions should be
+ scanned. In either case, *TOTAL contains the cost result. */
+
+static bool
+ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
+{
+ enum rtx_code outer_code = (enum rtx_code) outer_code_i;
+ enum machine_mode mode = GET_MODE (x);
+ const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
+
+ switch (code)
+ {
+ case CONST_INT:
+ case CONST:
+ case LABEL_REF:
+ case SYMBOL_REF:
+ if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
+ *total = 3;
+ else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
+ *total = 2;
+ else if (flag_pic && SYMBOLIC_CONST (x)
+ && (!TARGET_64BIT
+ || (!GET_CODE (x) != LABEL_REF
+ && (GET_CODE (x) != SYMBOL_REF
+ || !SYMBOL_REF_LOCAL_P (x)))))
+ *total = 1;
+ else
+ *total = 0;
+ return true;
+
+ case CONST_DOUBLE:
+ if (mode == VOIDmode)
+ *total = 0;
+ else
+ switch (standard_80387_constant_p (x))
+ {
+ case 1: /* 0.0 */
+ *total = 1;
+ break;
+ default: /* Other constants */
+ *total = 2;
+ break;
+ case 0:
+ case -1:
+ /* Start with (MEM (SYMBOL_REF)), since that's where
+ it'll probably end up. Add a penalty for size. */
+ *total = (COSTS_N_INSNS (1)
+ + (flag_pic != 0 && !TARGET_64BIT)
+ + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
+ break;
+ }
+ return true;
+
+ case ZERO_EXTEND:
+ /* The zero extensions is often completely free on x86_64, so make
+ it as cheap as possible. */
+ if (TARGET_64BIT && mode == DImode
+ && GET_MODE (XEXP (x, 0)) == SImode)
+ *total = 1;
+ else if (TARGET_ZERO_EXTEND_WITH_AND)
+ *total = cost->add;
+ else
+ *total = cost->movzx;
+ return false;
+
+ case SIGN_EXTEND:
+ *total = cost->movsx;
+ return false;
+
+ case ASHIFT:
+ if (CONST_INT_P (XEXP (x, 1))
+ && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
+ {
+ HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+ if (value == 1)
+ {
+ *total = cost->add;
+ return false;
+ }
+ if ((value == 2 || value == 3)
+ && cost->lea <= cost->shift_const)
+ {
+ *total = cost->lea;
+ return false;
+ }
+ }
+ /* FALLTHRU */
+
+ case ROTATE:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ case ROTATERT:
+ if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
+ {
+ if (CONST_INT_P (XEXP (x, 1)))
+ {
+ if (INTVAL (XEXP (x, 1)) > 32)
+ *total = cost->shift_const + COSTS_N_INSNS (2);
+ else
+ *total = cost->shift_const * 2;
+ }
+ else
+ {
+ if (GET_CODE (XEXP (x, 1)) == AND)
+ *total = cost->shift_var * 2;
+ else
+ *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
+ }
+ }
+ else
+ {
+ if (CONST_INT_P (XEXP (x, 1)))
+ *total = cost->shift_const;
+ else
+ *total = cost->shift_var;
+ }
+ return false;
+
+ case MULT:
+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+ {
+ /* ??? SSE scalar cost should be used here. */
+ *total = cost->fmul;
+ return false;
+ }
+ else if (X87_FLOAT_MODE_P (mode))
+ {
+ *total = cost->fmul;
+ return false;
+ }
+ else if (FLOAT_MODE_P (mode))
+ {
+ /* ??? SSE vector cost should be used here. */
+ *total = cost->fmul;
+ return false;
+ }
+ else
+ {
+ rtx op0 = XEXP (x, 0);
+ rtx op1 = XEXP (x, 1);
+ int nbits;
+ if (CONST_INT_P (XEXP (x, 1)))
+ {
+ unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+ for (nbits = 0; value != 0; value &= value - 1)
+ nbits++;
+ }
+ else
+ /* This is arbitrary. */
+ nbits = 7;
+
+ /* Compute costs correctly for widening multiplication. */
+ if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
+ && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
+ == GET_MODE_SIZE (mode))
+ {
+ int is_mulwiden = 0;
+ enum machine_mode inner_mode = GET_MODE (op0);
+
+ if (GET_CODE (op0) == GET_CODE (op1))
+ is_mulwiden = 1, op1 = XEXP (op1, 0);
+ else if (CONST_INT_P (op1))
+ {
+ if (GET_CODE (op0) == SIGN_EXTEND)
+ is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
+ == INTVAL (op1);
+ else
+ is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
+ }
+
+ if (is_mulwiden)
+ op0 = XEXP (op0, 0), mode = GET_MODE (op0);
+ }
+
+ *total = (cost->mult_init[MODE_INDEX (mode)]
+ + nbits * cost->mult_bit
+ + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
+
+ return true;
+ }
+
+ case DIV:
+ case UDIV:
+ case MOD:
+ case UMOD:
+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+ /* ??? SSE cost should be used here. */
+ *total = cost->fdiv;
+ else if (X87_FLOAT_MODE_P (mode))
+ *total = cost->fdiv;
+ else if (FLOAT_MODE_P (mode))
+ /* ??? SSE vector cost should be used here. */
+ *total = cost->fdiv;
+ else
+ *total = cost->divide[MODE_INDEX (mode)];
+ return false;
+
+ case PLUS:
+ if (GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
+ {
+ if (GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
+ && CONSTANT_P (XEXP (x, 1)))
+ {
+ HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
+ if (val == 2 || val == 4 || val == 8)
+ {
+ *total = cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
+ *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
+ outer_code, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, speed);
+ return true;
+ }
+ }
+ else if (GET_CODE (XEXP (x, 0)) == MULT
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+ {
+ HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
+ if (val == 2 || val == 4 || val == 8)
+ {
+ *total = cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, speed);
+ return true;
+ }
+ }
+ else if (GET_CODE (XEXP (x, 0)) == PLUS)
+ {
+ *total = cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, speed);
+ return true;
+ }
+ }
+ /* FALLTHRU */
+
+ case MINUS:
+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+ {
+ /* ??? SSE cost should be used here. */
+ *total = cost->fadd;
+ return false;
+ }
+ else if (X87_FLOAT_MODE_P (mode))
+ {
+ *total = cost->fadd;
+ return false;
+ }
+ else if (FLOAT_MODE_P (mode))
+ {
+ /* ??? SSE vector cost should be used here. */
+ *total = cost->fadd;
+ return false;
+ }
+ /* FALLTHRU */
+
+ case AND:
+ case IOR:
+ case XOR:
+ if (!TARGET_64BIT && mode == DImode)
+ {
+ *total = (cost->add * 2
+ + (rtx_cost (XEXP (x, 0), outer_code, speed)
+ << (GET_MODE (XEXP (x, 0)) != DImode))
+ + (rtx_cost (XEXP (x, 1), outer_code, speed)
+ << (GET_MODE (XEXP (x, 1)) != DImode)));
+ return true;
+ }
+ /* FALLTHRU */
+
+ case NEG:
+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+ {
+ /* ??? SSE cost should be used here. */
+ *total = cost->fchs;
+ return false;
+ }
+ else if (X87_FLOAT_MODE_P (mode))
+ {
+ *total = cost->fchs;
+ return false;
+ }
+ else if (FLOAT_MODE_P (mode))
+ {
+ /* ??? SSE vector cost should be used here. */
+ *total = cost->fchs;
+ return false;
+ }
+ /* FALLTHRU */
+
+ case NOT:
+ if (!TARGET_64BIT && mode == DImode)
+ *total = cost->add * 2;
+ else
+ *total = cost->add;
+ return false;
+
+ case COMPARE:
+ if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
+ && XEXP (XEXP (x, 0), 1) == const1_rtx
+ && CONST_INT_P (XEXP (XEXP (x, 0), 2))
+ && XEXP (x, 1) == const0_rtx)
+ {
+ /* This kind of construct is implemented using test[bwl].
+ Treat it as if we had an AND. */
+ *total = (cost->add
+ + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
+ + rtx_cost (const1_rtx, outer_code, speed));
+ return true;
+ }
+ return false;
+
+ case FLOAT_EXTEND:
+ if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
+ *total = 0;
+ return false;
+
+ case ABS:
+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+ /* ??? SSE cost should be used here. */
+ *total = cost->fabs;
+ else if (X87_FLOAT_MODE_P (mode))
+ *total = cost->fabs;
+ else if (FLOAT_MODE_P (mode))
+ /* ??? SSE vector cost should be used here. */
+ *total = cost->fabs;
+ return false;
+
+ case SQRT:
+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+ /* ??? SSE cost should be used here. */
+ *total = cost->fsqrt;
+ else if (X87_FLOAT_MODE_P (mode))
+ *total = cost->fsqrt;
+ else if (FLOAT_MODE_P (mode))
+ /* ??? SSE vector cost should be used here. */
+ *total = cost->fsqrt;
+ return false;
+
+ case UNSPEC:
+ if (XINT (x, 1) == UNSPEC_TP)
+ *total = 0;
+ return false;
+
+ default:
+ return false;
+ }
+}
+
+#if TARGET_MACHO
+
+static int current_machopic_label_num;
+
+/* Given a symbol name and its associated stub, write out the
+ definition of the stub. */
+
+void
+machopic_output_stub (FILE *file, const char *symb, const char *stub)
+{
+ unsigned int length;
+ char *binder_name, *symbol_name, lazy_ptr_name[32];
+ int label = ++current_machopic_label_num;
+
+ /* For 64-bit we shouldn't get here. */
+ gcc_assert (!TARGET_64BIT);
+
+ /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
+ symb = (*targetm.strip_name_encoding) (symb);
+
+ length = strlen (stub);
+ binder_name = XALLOCAVEC (char, length + 32);
+ GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
+
+ length = strlen (symb);
+ symbol_name = XALLOCAVEC (char, length + 32);
+ GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
+
+ sprintf (lazy_ptr_name, "L%d$lz", label);
+
+ if (MACHOPIC_PURE)
+ switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
+ else
+ switch_to_section (darwin_sections[machopic_symbol_stub_section]);
+
+ fprintf (file, "%s:\n", stub);
+ fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+
+ if (MACHOPIC_PURE)
+ {
+ fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
+ fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
+ fprintf (file, "\tjmp\t*%%edx\n");
+ }
+ else
+ fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
+
+ fprintf (file, "%s:\n", binder_name);
+
+ if (MACHOPIC_PURE)
+ {
+ fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
+ fprintf (file, "\tpushl\t%%eax\n");
+ }
+ else
+ fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
+
+ fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
+
+ switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
+ fprintf (file, "%s:\n", lazy_ptr_name);
+ fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
+ fprintf (file, "\t.long %s\n", binder_name);
+}
+
+void
+darwin_x86_file_end (void)
+{
+ darwin_file_end ();
+ ix86_file_end ();
+}
+#endif /* TARGET_MACHO */
+
+/* Order the registers for register allocator. */
+
+void
+x86_order_regs_for_local_alloc (void)
+{
+ int pos = 0;
+ int i;
+
+ /* First allocate the local general purpose registers. */
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (GENERAL_REGNO_P (i) && call_used_regs[i])
+ reg_alloc_order [pos++] = i;
+
+ /* Global general purpose registers. */
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (GENERAL_REGNO_P (i) && !call_used_regs[i])
+ reg_alloc_order [pos++] = i;
+
+ /* x87 registers come first in case we are doing FP math
+ using them. */
+ if (!TARGET_SSE_MATH)
+ for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ /* SSE registers. */
+ for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
+ reg_alloc_order [pos++] = i;
+ for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ /* x87 registers. */
+ if (TARGET_SSE_MATH)
+ for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
+ reg_alloc_order [pos++] = i;
+
+ /* Initialize the rest of array as we do not allocate some registers
+ at all. */
+ while (pos < FIRST_PSEUDO_REGISTER)
+ reg_alloc_order [pos++] = 0;
+}
+
+/* Handle a "ms_abi" or "sysv" attribute; arguments as in
+ struct attribute_spec.handler. */
+static tree
+ix86_handle_abi_attribute (tree *node, tree name,
+ tree args ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+ if (TREE_CODE (*node) != FUNCTION_TYPE
+ && TREE_CODE (*node) != METHOD_TYPE
+ && TREE_CODE (*node) != FIELD_DECL
+ && TREE_CODE (*node) != TYPE_DECL)
+ {
+ warning (OPT_Wattributes, "%qs attribute only applies to functions",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ return NULL_TREE;
+ }
+ if (!TARGET_64BIT)
+ {
+ warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ return NULL_TREE;
+ }
+
+ /* Can combine regparm with all attributes but fastcall. */
+ if (is_attribute_p ("ms_abi", name))
+ {
+ if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("ms_abi and sysv_abi attributes are not compatible");
+ }
+
+ return NULL_TREE;
+ }
+ else if (is_attribute_p ("sysv_abi", name))
+ {
+ if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("ms_abi and sysv_abi attributes are not compatible");
+ }
+
+ return NULL_TREE;
+ }
+
+ return NULL_TREE;
+}
+
+/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
+ struct attribute_spec.handler. */
+static tree
+ix86_handle_struct_attribute (tree *node, tree name,
+ tree args ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+ tree *type = NULL;
+ if (DECL_P (*node))
+ {
+ if (TREE_CODE (*node) == TYPE_DECL)
+ type = &TREE_TYPE (*node);
+ }
+ else
+ type = node;
+
+ if (!(type && (TREE_CODE (*type) == RECORD_TYPE
+ || TREE_CODE (*type) == UNION_TYPE)))
+ {
+ warning (OPT_Wattributes, "%qs attribute ignored",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+
+ else if ((is_attribute_p ("ms_struct", name)
+ && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
+ || ((is_attribute_p ("gcc_struct", name)
+ && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
+ {
+ warning (OPT_Wattributes, "%qs incompatible attribute ignored",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+}
+
+static bool
+ix86_ms_bitfield_layout_p (const_tree record_type)
+{
+ return (TARGET_MS_BITFIELD_LAYOUT &&
+ !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
+ || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
+}
+
+/* Returns an expression indicating where the this parameter is
+ located on entry to the FUNCTION. */
+
+static rtx
+x86_this_parameter (tree function)
+{
+ tree type = TREE_TYPE (function);
+ bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
+ int nregs;
+
+ if (TARGET_64BIT)
+ {
+ const int *parm_regs;
+
+ if (ix86_function_type_abi (type) == MS_ABI)
+ parm_regs = x86_64_ms_abi_int_parameter_registers;
+ else
+ parm_regs = x86_64_int_parameter_registers;
+ return gen_rtx_REG (DImode, parm_regs[aggr]);
+ }
+
+ nregs = ix86_function_regparm (type, function);
+
+ if (nregs > 0 && !stdarg_p (type))
+ {
+ int regno;
+
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
+ regno = aggr ? DX_REG : CX_REG;
+ else
+ {
+ regno = AX_REG;
+ if (aggr)
+ {
+ regno = DX_REG;
+ if (nregs == 1)
+ return gen_rtx_MEM (SImode,
+ plus_constant (stack_pointer_rtx, 4));
+ }
+ }
+ return gen_rtx_REG (SImode, regno);
+ }
+
+ return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
+}
+
+/* Determine whether x86_output_mi_thunk can succeed. */
+
+static bool
+x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
+ HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
+ HOST_WIDE_INT vcall_offset, const_tree function)
+{
+ /* 64-bit can handle anything. */
+ if (TARGET_64BIT)
+ return true;
+
+ /* For 32-bit, everything's fine if we have one free register. */
+ if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
+ return true;
+
+ /* Need a free register for vcall_offset. */
+ if (vcall_offset)
+ return false;
+
+ /* Need a free register for GOT references. */
+ if (flag_pic && !(*targetm.binds_local_p) (function))
+ return false;
+
+ /* Otherwise ok. */
+ return true;
+}
+
+/* Output the assembler code for a thunk function. THUNK_DECL is the
+ declaration for the thunk function itself, FUNCTION is the decl for
+ the target function. DELTA is an immediate constant offset to be
+ added to THIS. If VCALL_OFFSET is nonzero, the word at
+ *(*this + vcall_offset) should be added to THIS. */
+
+static void
+x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
+ tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
+ HOST_WIDE_INT vcall_offset, tree function)
+{
+ rtx xops[3];
+ rtx this_param = x86_this_parameter (function);
+ rtx this_reg, tmp;
+
+ /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
+ pull it in now and let DELTA benefit. */
+ if (REG_P (this_param))
+ this_reg = this_param;
+ else if (vcall_offset)
+ {
+ /* Put the this parameter into %eax. */
+ xops[0] = this_param;
+ xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
+ output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
+ }
+ else
+ this_reg = NULL_RTX;
+
+ /* Adjust the this parameter by a fixed constant. */
+ if (delta)
+ {
+ xops[0] = GEN_INT (delta);
+ xops[1] = this_reg ? this_reg : this_param;
+ if (TARGET_64BIT)
+ {
+ if (!x86_64_general_operand (xops[0], DImode))
+ {
+ tmp = gen_rtx_REG (DImode, R10_REG);
+ xops[1] = tmp;
+ output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
+ xops[0] = tmp;
+ xops[1] = this_param;
+ }
+ output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
+ }
+ else
+ output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
+ }
+
+ /* Adjust the this parameter by a value stored in the vtable. */
+ if (vcall_offset)
+ {
+ if (TARGET_64BIT)
+ tmp = gen_rtx_REG (DImode, R10_REG);
+ else
+ {
+ int tmp_regno = CX_REG;
+ if (lookup_attribute ("fastcall",
+ TYPE_ATTRIBUTES (TREE_TYPE (function))))
+ tmp_regno = AX_REG;
+ tmp = gen_rtx_REG (SImode, tmp_regno);
+ }
+
+ xops[0] = gen_rtx_MEM (Pmode, this_reg);
+ xops[1] = tmp;
+ output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
+
+ /* Adjust the this parameter. */
+ xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
+ if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
+ {
+ rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
+ xops[0] = GEN_INT (vcall_offset);
+ xops[1] = tmp2;
+ output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
+ xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
+ }
+ xops[1] = this_reg;
+ output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
+ }
+
+ /* If necessary, drop THIS back to its stack slot. */
+ if (this_reg && this_reg != this_param)
+ {
+ xops[0] = this_reg;
+ xops[1] = this_param;
+ output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
+ }
+
+ xops[0] = XEXP (DECL_RTL (function), 0);
+ if (TARGET_64BIT)
+ {
+ if (!flag_pic || (*targetm.binds_local_p) (function))
+ output_asm_insn ("jmp\t%P0", xops);
+ /* All thunks should be in the same object as their target,
+ and thus binds_local_p should be true. */
+ else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
+ gcc_unreachable ();
+ else
+ {
+ tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
+ tmp = gen_rtx_CONST (Pmode, tmp);
+ tmp = gen_rtx_MEM (QImode, tmp);
+ xops[0] = tmp;
+ output_asm_insn ("jmp\t%A0", xops);
+ }
+ }
+ else
+ {
+ if (!flag_pic || (*targetm.binds_local_p) (function))
+ output_asm_insn ("jmp\t%P0", xops);
+ else
+#if TARGET_MACHO
+ if (TARGET_MACHO)
+ {
+ rtx sym_ref = XEXP (DECL_RTL (function), 0);
+ tmp = (gen_rtx_SYMBOL_REF
+ (Pmode,
+ machopic_indirection_name (sym_ref, /*stub_p=*/true)));
+ tmp = gen_rtx_MEM (QImode, tmp);
+ xops[0] = tmp;
+ output_asm_insn ("jmp\t%0", xops);
+ }
+ else
+#endif /* TARGET_MACHO */
+ {
+ tmp = gen_rtx_REG (SImode, CX_REG);
+ output_set_got (tmp, NULL_RTX);
+
+ xops[1] = tmp;
+ output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
+ output_asm_insn ("jmp\t{*}%1", xops);
+ }
+ }
+}
+
+static void
+x86_file_start (void)
+{
+ default_file_start ();
+#if TARGET_MACHO
+ darwin_file_start ();
+#endif
+ if (X86_FILE_START_VERSION_DIRECTIVE)
+ fputs ("\t.version\t\"01.01\"\n", asm_out_file);
+ if (X86_FILE_START_FLTUSED)
+ fputs ("\t.global\t__fltused\n", asm_out_file);
+ if (ix86_asm_dialect == ASM_INTEL)
+ fputs ("\t.intel_syntax noprefix\n", asm_out_file);
+}
+
+int
+x86_field_alignment (tree field, int computed)
+{
+ enum machine_mode mode;
+ tree type = TREE_TYPE (field);
+
+ if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
+ return computed;
+ mode = TYPE_MODE (strip_array_types (type));
+ if (mode == DFmode || mode == DCmode
+ || GET_MODE_CLASS (mode) == MODE_INT
+ || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
+ return MIN (32, computed);
+ return computed;
+}
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+ for profiling a function entry. */
+void
+x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
+{
+ if (TARGET_64BIT)
+ {
+#ifndef NO_PROFILE_COUNTERS
+ fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
+#endif
+
+ if (DEFAULT_ABI == SYSV_ABI && flag_pic)
+ fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
+ else
+ fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
+ }
+ else if (flag_pic)
+ {
+#ifndef NO_PROFILE_COUNTERS
+ fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
+ LPREFIX, labelno, PROFILE_COUNT_REGISTER);
+#endif
+ fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
+ }
+ else
+ {
+#ifndef NO_PROFILE_COUNTERS
+ fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
+ PROFILE_COUNT_REGISTER);
+#endif
+ fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
+ }
+}
+
+/* We don't have exact information about the insn sizes, but we may assume
+ quite safely that we are informed about all 1 byte insns and memory
+ address sizes. This is enough to eliminate unnecessary padding in
+ 99% of cases. */
+
+static int
+min_insn_size (rtx insn)
+{
+ int l = 0;
+
+ if (!INSN_P (insn) || !active_insn_p (insn))
+ return 0;
+
+ /* Discard alignments we've emit and jump instructions. */
+ if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+ && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
+ return 0;
+ if (JUMP_P (insn)
+ && (GET_CODE (PATTERN (insn)) == ADDR_VEC
+ || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
+ return 0;
+
+ /* Important case - calls are always 5 bytes.
+ It is common to have many calls in the row. */
+ if (CALL_P (insn)
+ && symbolic_reference_mentioned_p (PATTERN (insn))
+ && !SIBLING_CALL_P (insn))
+ return 5;
+ if (get_attr_length (insn) <= 1)
+ return 1;
+
+ /* For normal instructions we may rely on the sizes of addresses
+ and the presence of symbol to require 4 bytes of encoding.
+ This is not the case for jumps where references are PC relative. */
+ if (!JUMP_P (insn))
+ {
+ l = get_attr_length_address (insn);
+ if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
+ l = 4;
+ }
+ if (l)
+ return 1+l;
+ else
+ return 2;
+}
+
+/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
+ window. */
+
+static void
+ix86_avoid_jump_misspredicts (void)
+{
+ rtx insn, start = get_insns ();
+ int nbytes = 0, njumps = 0;
+ int isjump = 0;
+
+ /* Look for all minimal intervals of instructions containing 4 jumps.
+ The intervals are bounded by START and INSN. NBYTES is the total
+ size of instructions in the interval including INSN and not including
+ START. When the NBYTES is smaller than 16 bytes, it is possible
+ that the end of START and INSN ends up in the same 16byte page.
+
+ The smallest offset in the page INSN can start is the case where START
+ ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
+ We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
+ */
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+
+ nbytes += min_insn_size (insn);
+ if (dump_file)
+ fprintf(dump_file, "Insn %i estimated to %i bytes\n",
+ INSN_UID (insn), min_insn_size (insn));
+ if ((JUMP_P (insn)
+ && GET_CODE (PATTERN (insn)) != ADDR_VEC
+ && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
+ || CALL_P (insn))
+ njumps++;
+ else
+ continue;
+
+ while (njumps > 3)
+ {
+ start = NEXT_INSN (start);
+ if ((JUMP_P (start)
+ && GET_CODE (PATTERN (start)) != ADDR_VEC
+ && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
+ || CALL_P (start))
+ njumps--, isjump = 1;
+ else
+ isjump = 0;
+ nbytes -= min_insn_size (start);
+ }
+ gcc_assert (njumps >= 0);
+ if (dump_file)
+ fprintf (dump_file, "Interval %i to %i has %i bytes\n",
+ INSN_UID (start), INSN_UID (insn), nbytes);
+
+ if (njumps == 3 && isjump && nbytes < 16)
+ {
+ int padsize = 15 - nbytes + min_insn_size (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Padding insn %i by %i bytes!\n",
+ INSN_UID (insn), padsize);
+ emit_insn_before (gen_align (GEN_INT (padsize)), insn);
+ }
+ }
+}
+
+/* AMD Athlon works faster
+ when RET is not destination of conditional jump or directly preceded
+ by other jump instruction. We avoid the penalty by inserting NOP just
+ before the RET instructions in such cases. */
+static void
+ix86_pad_returns (void)
+{
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+ {
+ basic_block bb = e->src;
+ rtx ret = BB_END (bb);
+ rtx prev;
+ bool replace = false;
+
+ if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
+ || optimize_bb_for_size_p (bb))
+ continue;
+ for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
+ if (active_insn_p (prev) || LABEL_P (prev))
+ break;
+ if (prev && LABEL_P (prev))
+ {
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ if (EDGE_FREQUENCY (e) && e->src->index >= 0
+ && !(e->flags & EDGE_FALLTHRU))
+ replace = true;
+ }
+ if (!replace)
+ {
+ prev = prev_active_insn (ret);
+ if (prev
+ && ((JUMP_P (prev) && any_condjump_p (prev))
+ || CALL_P (prev)))
+ replace = true;
+ /* Empty functions get branch mispredict even when the jump destination
+ is not visible to us. */
+ if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
+ replace = true;
+ }
+ if (replace)
+ {
+ emit_insn_before (gen_return_internal_long (), ret);
+ delete_insn (ret);
+ }
+ }
+}
+
+/* Implement machine specific optimizations. We implement padding of returns
+ for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
+static void
+ix86_reorg (void)
+{
+ if (TARGET_PAD_RETURNS && optimize
+ && optimize_function_for_speed_p (cfun))
+ ix86_pad_returns ();
+ if (TARGET_FOUR_JUMP_LIMIT && optimize
+ && optimize_function_for_speed_p (cfun))
+ ix86_avoid_jump_misspredicts ();
+}
+
+/* Return nonzero when QImode register that must be represented via REX prefix
+ is used. */
+bool
+x86_extended_QIreg_mentioned_p (rtx insn)
+{
+ int i;
+ extract_insn_cached (insn);
+ for (i = 0; i < recog_data.n_operands; i++)
+ if (REG_P (recog_data.operand[i])
+ && REGNO (recog_data.operand[i]) > BX_REG)
+ return true;
+ return false;
+}
+
+/* Return nonzero when P points to register encoded via REX prefix.
+ Called via for_each_rtx. */
+static int
+extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
+{
+ unsigned int regno;
+ if (!REG_P (*p))
+ return 0;
+ regno = REGNO (*p);
+ return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
+}
+
+/* Return true when INSN mentions register that must be encoded using REX
+ prefix. */
+bool
+x86_extended_reg_mentioned_p (rtx insn)
+{
+ return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
+ extended_reg_mentioned_1, NULL);
+}
+
+/* Generate an unsigned DImode/SImode to FP conversion. This is the same code
+ optabs would emit if we didn't have TFmode patterns. */
+
+void
+x86_emit_floatuns (rtx operands[2])
+{
+ rtx neglab, donelab, i0, i1, f0, in, out;
+ enum machine_mode mode, inmode;
+
+ inmode = GET_MODE (operands[1]);
+ gcc_assert (inmode == SImode || inmode == DImode);
+
+ out = operands[0];
+ in = force_reg (inmode, operands[1]);
+ mode = GET_MODE (out);
+ neglab = gen_label_rtx ();
+ donelab = gen_label_rtx ();
+ f0 = gen_reg_rtx (mode);
+
+ emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
+
+ expand_float (out, in, 0);
+
+ emit_jump_insn (gen_jump (donelab));
+ emit_barrier ();
+
+ emit_label (neglab);
+
+ i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
+ 1, OPTAB_DIRECT);
+ i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
+ 1, OPTAB_DIRECT);
+ i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
+
+ expand_float (f0, i0, 0);
+
+ emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
+
+ emit_label (donelab);
+}
+
+/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
+ with all elements equal to VAR. Return true if successful. */
+
+static bool
+ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx val)
+{
+ enum machine_mode hmode, smode, wsmode, wvmode;
+ rtx x;
+
+ switch (mode)
+ {
+ case V2SImode:
+ case V2SFmode:
+ if (!mmx_ok)
+ return false;
+ /* FALLTHRU */
+
+ case V2DFmode:
+ case V2DImode:
+ case V4SFmode:
+ case V4SImode:
+ val = force_reg (GET_MODE_INNER (mode), val);
+ x = gen_rtx_VEC_DUPLICATE (mode, val);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ return true;
+
+ case V4HImode:
+ if (!mmx_ok)
+ return false;
+ if (TARGET_SSE || TARGET_3DNOW_A)
+ {
+ val = gen_lowpart (SImode, val);
+ x = gen_rtx_TRUNCATE (HImode, val);
+ x = gen_rtx_VEC_DUPLICATE (mode, x);
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ return true;
+ }
+ else
+ {
+ smode = HImode;
+ wsmode = SImode;
+ wvmode = V2SImode;
+ goto widen;
+ }
+
+ case V8QImode:
+ if (!mmx_ok)
+ return false;
+ smode = QImode;
+ wsmode = HImode;
+ wvmode = V4HImode;
+ goto widen;
+ case V8HImode:
+ if (TARGET_SSE2)
+ {
+ rtx tmp1, tmp2;
+ /* Extend HImode to SImode using a paradoxical SUBREG. */
+ tmp1 = gen_reg_rtx (SImode);
+ emit_move_insn (tmp1, gen_lowpart (SImode, val));
+ /* Insert the SImode value as low element of V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ tmp1 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
+ /* Cast the V4SImode vector back to a V8HImode vector. */
+ tmp1 = gen_reg_rtx (V8HImode);
+ emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
+ /* Duplicate the low short through the whole low SImode word. */
+ emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
+ /* Cast the V8HImode vector back to a V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
+ /* Replicate the low element of the V4SImode vector. */
+ emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
+ /* Cast the V2SImode back to V8HImode, and store in target. */
+ emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
+ return true;
+ }
+ smode = HImode;
+ wsmode = SImode;
+ wvmode = V4SImode;
+ goto widen;
+ case V16QImode:
+ if (TARGET_SSE2)
+ {
+ rtx tmp1, tmp2;
+ /* Extend QImode to SImode using a paradoxical SUBREG. */
+ tmp1 = gen_reg_rtx (SImode);
+ emit_move_insn (tmp1, gen_lowpart (SImode, val));
+ /* Insert the SImode value as low element of V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ tmp1 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
+ /* Cast the V4SImode vector back to a V16QImode vector. */
+ tmp1 = gen_reg_rtx (V16QImode);
+ emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
+ /* Duplicate the low byte through the whole low SImode word. */
+ emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
+ emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
+ /* Cast the V16QImode vector back to a V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
+ /* Replicate the low element of the V4SImode vector. */
+ emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
+ /* Cast the V2SImode back to V16QImode, and store in target. */
+ emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
+ return true;
+ }
+ smode = QImode;
+ wsmode = HImode;
+ wvmode = V8HImode;
+ goto widen;
+ widen:
+ /* Replicate the value once into the next wider mode and recurse. */
+ val = convert_modes (wsmode, smode, val, true);
+ x = expand_simple_binop (wsmode, ASHIFT, val,
+ GEN_INT (GET_MODE_BITSIZE (smode)),
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
+
+ x = gen_reg_rtx (wvmode);
+ if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
+ gcc_unreachable ();
+ emit_move_insn (target, gen_lowpart (mode, x));
+ return true;
+
+ case V4DFmode:
+ hmode = V2DFmode;
+ goto half;
+ case V4DImode:
+ hmode = V2DImode;
+ goto half;
+ case V8SFmode:
+ hmode = V4SFmode;
+ goto half;
+ case V8SImode:
+ hmode = V4SImode;
+ goto half;
+ case V16HImode:
+ hmode = V8HImode;
+ goto half;
+ case V32QImode:
+ hmode = V16QImode;
+ goto half;
+half:
+ {
+ rtx tmp = gen_reg_rtx (hmode);
+ ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
+ }
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
+ whose ONE_VAR element is VAR, and other elements are zero. Return true
+ if successful. */
+
+static bool
+ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx var, int one_var)
+{
+ enum machine_mode vsimode;
+ rtx new_target;
+ rtx x, tmp;
+ bool use_vector_set = false;
+
+ switch (mode)
+ {
+ case V2DImode:
+ /* For SSE4.1, we normally use vector set. But if the second
+ element is zero and inter-unit moves are OK, we use movq
+ instead. */
+ use_vector_set = (TARGET_64BIT
+ && TARGET_SSE4_1
+ && !(TARGET_INTER_UNIT_MOVES
+ && one_var == 0));
+ break;
+ case V16QImode:
+ case V4SImode:
+ case V4SFmode:
+ use_vector_set = TARGET_SSE4_1;
+ break;
+ case V8HImode:
+ use_vector_set = TARGET_SSE2;
+ break;
+ case V4HImode:
+ use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
+ break;
+ case V32QImode:
+ case V16HImode:
+ case V8SImode:
+ case V8SFmode:
+ case V4DFmode:
+ use_vector_set = TARGET_AVX;
+ break;
+ case V4DImode:
+ /* Use ix86_expand_vector_set in 64bit mode only. */
+ use_vector_set = TARGET_AVX && TARGET_64BIT;
+ break;
+ default:
+ break;
+ }
+
+ if (use_vector_set)
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
+ var = force_reg (GET_MODE_INNER (mode), var);
+ ix86_expand_vector_set (mmx_ok, target, var, one_var);
+ return true;
+ }
+
+ switch (mode)
+ {
+ case V2SFmode:
+ case V2SImode:
+ if (!mmx_ok)
+ return false;
+ /* FALLTHRU */
+
+ case V2DFmode:
+ case V2DImode:
+ if (one_var != 0)
+ return false;
+ var = force_reg (GET_MODE_INNER (mode), var);
+ x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
+ emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ return true;
+
+ case V4SFmode:
+ case V4SImode:
+ if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
+ new_target = gen_reg_rtx (mode);
+ else
+ new_target = target;
+ var = force_reg (GET_MODE_INNER (mode), var);
+ x = gen_rtx_VEC_DUPLICATE (mode, var);
+ x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
+ if (one_var != 0)
+ {
+ /* We need to shuffle the value to the correct position, so
+ create a new pseudo to store the intermediate result. */
+
+ /* With SSE2, we can use the integer shuffle insns. */
+ if (mode != V4SFmode && TARGET_SSE2)
+ {
+ emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
+ GEN_INT (1),
+ GEN_INT (one_var == 1 ? 0 : 1),
+ GEN_INT (one_var == 2 ? 0 : 1),
+ GEN_INT (one_var == 3 ? 0 : 1)));
+ if (target != new_target)
+ emit_move_insn (target, new_target);
+ return true;
+ }
+
+ /* Otherwise convert the intermediate result to V4SFmode and
+ use the SSE1 shuffle instructions. */
+ if (mode != V4SFmode)
+ {
+ tmp = gen_reg_rtx (V4SFmode);
+ emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
+ }
+ else
+ tmp = new_target;
+
+ emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
+ GEN_INT (1),
+ GEN_INT (one_var == 1 ? 0 : 1),
+ GEN_INT (one_var == 2 ? 0+4 : 1+4),
+ GEN_INT (one_var == 3 ? 0+4 : 1+4)));
+
+ if (mode != V4SFmode)
+ emit_move_insn (target, gen_lowpart (V4SImode, tmp));
+ else if (tmp != target)
+ emit_move_insn (target, tmp);
+ }
+ else if (target != new_target)
+ emit_move_insn (target, new_target);
+ return true;
+
+ case V8HImode:
+ case V16QImode:
+ vsimode = V4SImode;
+ goto widen;
+ case V4HImode:
+ case V8QImode:
+ if (!mmx_ok)
+ return false;
+ vsimode = V2SImode;
+ goto widen;
+ widen:
+ if (one_var != 0)
+ return false;
+
+ /* Zero extend the variable element to SImode and recurse. */
+ var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
+
+ x = gen_reg_rtx (vsimode);
+ if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
+ var, one_var))
+ gcc_unreachable ();
+
+ emit_move_insn (target, gen_lowpart (mode, x));
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
+ consisting of the values in VALS. It is known that all elements
+ except ONE_VAR are constants. Return true if successful. */
+
+static bool
+ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx vals, int one_var)
+{
+ rtx var = XVECEXP (vals, 0, one_var);
+ enum machine_mode wmode;
+ rtx const_vec, x;
+
+ const_vec = copy_rtx (vals);
+ XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
+ const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
+
+ switch (mode)
+ {
+ case V2DFmode:
+ case V2DImode:
+ case V2SFmode:
+ case V2SImode:
+ /* For the two element vectors, it's just as easy to use
+ the general case. */
+ return false;
+
+ case V4DImode:
+ /* Use ix86_expand_vector_set in 64bit mode only. */
+ if (!TARGET_64BIT)
+ return false;
+ case V4DFmode:
+ case V8SFmode:
+ case V8SImode:
+ case V16HImode:
+ case V32QImode:
+ case V4SFmode:
+ case V4SImode:
+ case V8HImode:
+ case V4HImode:
+ break;
+
+ case V16QImode:
+ if (TARGET_SSE4_1)
+ break;
+ wmode = V8HImode;
+ goto widen;
+ case V8QImode:
+ wmode = V4HImode;
+ goto widen;
+ widen:
+ /* There's no way to set one QImode entry easily. Combine
+ the variable value with its adjacent constant value, and
+ promote to an HImode set. */
+ x = XVECEXP (vals, 0, one_var ^ 1);
+ if (one_var & 1)
+ {
+ var = convert_modes (HImode, QImode, var, true);
+ var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ x = GEN_INT (INTVAL (x) & 0xff);
+ }
+ else
+ {
+ var = convert_modes (HImode, QImode, var, true);
+ x = gen_int_mode (INTVAL (x) << 8, HImode);
+ }
+ if (x != const0_rtx)
+ var = expand_simple_binop (HImode, IOR, var, x, var,
+ 1, OPTAB_LIB_WIDEN);
+
+ x = gen_reg_rtx (wmode);
+ emit_move_insn (x, gen_lowpart (wmode, const_vec));
+ ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
+
+ emit_move_insn (target, gen_lowpart (mode, x));
+ return true;
+
+ default:
+ return false;
+ }
+
+ emit_move_insn (target, const_vec);
+ ix86_expand_vector_set (mmx_ok, target, var, one_var);
+ return true;
+}
+
+/* A subroutine of ix86_expand_vector_init_general. Use vector
+ concatenate to handle the most general case: all values variable,
+ and none identical. */
+
+static void
+ix86_expand_vector_init_concat (enum machine_mode mode,
+ rtx target, rtx *ops, int n)
+{
+ enum machine_mode cmode, hmode = VOIDmode;
+ rtx first[8], second[4];
+ rtvec v;
+ int i, j;
+
+ switch (n)
+ {
+ case 2:
+ switch (mode)
+ {
+ case V8SImode:
+ cmode = V4SImode;
+ break;
+ case V8SFmode:
+ cmode = V4SFmode;
+ break;
+ case V4DImode:
+ cmode = V2DImode;
+ break;
+ case V4DFmode:
+ cmode = V2DFmode;
+ break;
+ case V4SImode:
+ cmode = V2SImode;
+ break;
+ case V4SFmode:
+ cmode = V2SFmode;
+ break;
+ case V2DImode:
+ cmode = DImode;
+ break;
+ case V2SImode:
+ cmode = SImode;
+ break;
+ case V2DFmode:
+ cmode = DFmode;
+ break;
+ case V2SFmode:
+ cmode = SFmode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (!register_operand (ops[1], cmode))
+ ops[1] = force_reg (cmode, ops[1]);
+ if (!register_operand (ops[0], cmode))
+ ops[0] = force_reg (cmode, ops[0]);
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, ops[0],
+ ops[1])));
+ break;
+
+ case 4:
+ switch (mode)
+ {
+ case V4DImode:
+ cmode = V2DImode;
+ break;
+ case V4DFmode:
+ cmode = V2DFmode;
+ break;
+ case V4SImode:
+ cmode = V2SImode;
+ break;
+ case V4SFmode:
+ cmode = V2SFmode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ goto half;
+
+ case 8:
+ switch (mode)
+ {
+ case V8SImode:
+ cmode = V2SImode;
+ hmode = V4SImode;
+ break;
+ case V8SFmode:
+ cmode = V2SFmode;
+ hmode = V4SFmode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ goto half;
+
+half:
+ /* FIXME: We process inputs backward to help RA. PR 36222. */
+ i = n - 1;
+ j = (n >> 1) - 1;
+ for (; i > 0; i -= 2, j--)
+ {
+ first[j] = gen_reg_rtx (cmode);
+ v = gen_rtvec (2, ops[i - 1], ops[i]);
+ ix86_expand_vector_init (false, first[j],
+ gen_rtx_PARALLEL (cmode, v));
+ }
+
+ n >>= 1;
+ if (n > 2)
+ {
+ gcc_assert (hmode != VOIDmode);
+ for (i = j = 0; i < n; i += 2, j++)
+ {
+ second[j] = gen_reg_rtx (hmode);
+ ix86_expand_vector_init_concat (hmode, second [j],
+ &first [i], 2);
+ }
+ n >>= 1;
+ ix86_expand_vector_init_concat (mode, target, second, n);
+ }
+ else
+ ix86_expand_vector_init_concat (mode, target, first, n);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* A subroutine of ix86_expand_vector_init_general. Use vector
+ interleave to handle the most general case: all values variable,
+ and none identical. */
+
+static void
+ix86_expand_vector_init_interleave (enum machine_mode mode,
+ rtx target, rtx *ops, int n)
+{
+ enum machine_mode first_imode, second_imode, third_imode, inner_mode;
+ int i, j;
+ rtx op0, op1;
+ rtx (*gen_load_even) (rtx, rtx, rtx);
+ rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
+ rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
+
+ switch (mode)
+ {
+ case V8HImode:
+ gen_load_even = gen_vec_setv8hi;
+ gen_interleave_first_low = gen_vec_interleave_lowv4si;
+ gen_interleave_second_low = gen_vec_interleave_lowv2di;
+ inner_mode = HImode;
+ first_imode = V4SImode;
+ second_imode = V2DImode;
+ third_imode = VOIDmode;
+ break;
+ case V16QImode:
+ gen_load_even = gen_vec_setv16qi;
+ gen_interleave_first_low = gen_vec_interleave_lowv8hi;
+ gen_interleave_second_low = gen_vec_interleave_lowv4si;
+ inner_mode = QImode;
+ first_imode = V8HImode;
+ second_imode = V4SImode;
+ third_imode = V2DImode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ for (i = 0; i < n; i++)
+ {
+ /* Extend the odd elment to SImode using a paradoxical SUBREG. */
+ op0 = gen_reg_rtx (SImode);
+ emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
+
+ /* Insert the SImode value as low element of V4SImode vector. */
+ op1 = gen_reg_rtx (V4SImode);
+ op0 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode,
+ op0),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
+
+ /* Cast the V4SImode vector back to a vector in orignal mode. */
+ op0 = gen_reg_rtx (mode);
+ emit_move_insn (op0, gen_lowpart (mode, op1));
+
+ /* Load even elements into the second positon. */
+ emit_insn ((*gen_load_even) (op0,
+ force_reg (inner_mode,
+ ops [i + i + 1]),
+ const1_rtx));
+
+ /* Cast vector to FIRST_IMODE vector. */
+ ops[i] = gen_reg_rtx (first_imode);
+ emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
+ }
+
+ /* Interleave low FIRST_IMODE vectors. */
+ for (i = j = 0; i < n; i += 2, j++)
+ {
+ op0 = gen_reg_rtx (first_imode);
+ emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
+
+ /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
+ ops[j] = gen_reg_rtx (second_imode);
+ emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
+ }
+
+ /* Interleave low SECOND_IMODE vectors. */
+ switch (second_imode)
+ {
+ case V4SImode:
+ for (i = j = 0; i < n / 2; i += 2, j++)
+ {
+ op0 = gen_reg_rtx (second_imode);
+ emit_insn ((*gen_interleave_second_low) (op0, ops[i],
+ ops[i + 1]));
+
+ /* Cast the SECOND_IMODE vector to the THIRD_IMODE
+ vector. */
+ ops[j] = gen_reg_rtx (third_imode);
+ emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
+ }
+ second_imode = V2DImode;
+ gen_interleave_second_low = gen_vec_interleave_lowv2di;
+ /* FALLTHRU */
+
+ case V2DImode:
+ op0 = gen_reg_rtx (second_imode);
+ emit_insn ((*gen_interleave_second_low) (op0, ops[0],
+ ops[1]));
+
+ /* Cast the SECOND_IMODE vector back to a vector on original
+ mode. */
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_lowpart (mode, op0)));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* A subroutine of ix86_expand_vector_init. Handle the most general case:
+ all values variable, and none identical. */
+
+static void
+ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx vals)
+{
+ rtx ops[32], op0, op1;
+ enum machine_mode half_mode = VOIDmode;
+ int n, i;
+
+ switch (mode)
+ {
+ case V2SFmode:
+ case V2SImode:
+ if (!mmx_ok && !TARGET_SSE)
+ break;
+ /* FALLTHRU */
+
+ case V8SFmode:
+ case V8SImode:
+ case V4DFmode:
+ case V4DImode:
+ case V4SFmode:
+ case V4SImode:
+ case V2DFmode:
+ case V2DImode:
+ n = GET_MODE_NUNITS (mode);
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ ix86_expand_vector_init_concat (mode, target, ops, n);
+ return;
+
+ case V32QImode:
+ half_mode = V16QImode;
+ goto half;
+
+ case V16HImode:
+ half_mode = V8HImode;
+ goto half;
+
+half:
+ n = GET_MODE_NUNITS (mode);
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ op0 = gen_reg_rtx (half_mode);
+ op1 = gen_reg_rtx (half_mode);
+ ix86_expand_vector_init_interleave (half_mode, op0, ops,
+ n >> 2);
+ ix86_expand_vector_init_interleave (half_mode, op1,
+ &ops [n >> 1], n >> 2);
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, op0, op1)));
+ return;
+
+ case V16QImode:
+ if (!TARGET_SSE4_1)
+ break;
+ /* FALLTHRU */
+
+ case V8HImode:
+ if (!TARGET_SSE2)
+ break;
+
+ /* Don't use ix86_expand_vector_init_interleave if we can't
+ move from GPR to SSE register directly. */
+ if (!TARGET_INTER_UNIT_MOVES)
+ break;
+
+ n = GET_MODE_NUNITS (mode);
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
+ return;
+
+ case V4HImode:
+ case V8QImode:
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ {
+ int i, j, n_elts, n_words, n_elt_per_word;
+ enum machine_mode inner_mode;
+ rtx words[4], shift;
+
+ inner_mode = GET_MODE_INNER (mode);
+ n_elts = GET_MODE_NUNITS (mode);
+ n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
+ n_elt_per_word = n_elts / n_words;
+ shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
+
+ for (i = 0; i < n_words; ++i)
+ {
+ rtx word = NULL_RTX;
+
+ for (j = 0; j < n_elt_per_word; ++j)
+ {
+ rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
+ elt = convert_modes (word_mode, inner_mode, elt, true);
+
+ if (j == 0)
+ word = elt;
+ else
+ {
+ word = expand_simple_binop (word_mode, ASHIFT, word, shift,
+ word, 1, OPTAB_LIB_WIDEN);
+ word = expand_simple_binop (word_mode, IOR, word, elt,
+ word, 1, OPTAB_LIB_WIDEN);
+ }
+ }
+
+ words[i] = word;
+ }
+
+ if (n_words == 1)
+ emit_move_insn (target, gen_lowpart (mode, words[0]));
+ else if (n_words == 2)
+ {
+ rtx tmp = gen_reg_rtx (mode);
+ emit_clobber (tmp);
+ emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
+ emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
+ emit_move_insn (target, tmp);
+ }
+ else if (n_words == 4)
+ {
+ rtx tmp = gen_reg_rtx (V4SImode);
+ gcc_assert (word_mode == SImode);
+ vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
+ ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
+ emit_move_insn (target, gen_lowpart (mode, tmp));
+ }
+ else
+ gcc_unreachable ();
+ }
+}
+
+/* Initialize vector TARGET via VALS. Suppress the use of MMX
+ instructions unless MMX_OK is true. */
+
+void
+ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
+{
+ enum machine_mode mode = GET_MODE (target);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int n_var = 0, one_var = -1;
+ bool all_same = true, all_const_zero = true;
+ int i;
+ rtx x;
+
+ for (i = 0; i < n_elts; ++i)
+ {
+ x = XVECEXP (vals, 0, i);
+ if (!(CONST_INT_P (x)
+ || GET_CODE (x) == CONST_DOUBLE
+ || GET_CODE (x) == CONST_FIXED))
+ n_var++, one_var = i;
+ else if (x != CONST0_RTX (inner_mode))
+ all_const_zero = false;
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
+
+ /* Constants are best loaded from the constant pool. */
+ if (n_var == 0)
+ {
+ emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
+ return;
+ }
+
+ /* If all values are identical, broadcast the value. */
+ if (all_same
+ && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
+ XVECEXP (vals, 0, 0)))
+ return;
+
+ /* Values where only one field is non-constant are best loaded from
+ the pool and overwritten via move later. */
+ if (n_var == 1)
+ {
+ if (all_const_zero
+ && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
+ XVECEXP (vals, 0, one_var),
+ one_var))
+ return;
+
+ if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
+ return;
+ }
+
+ ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
+}
+
+void
+ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
+{
+ enum machine_mode mode = GET_MODE (target);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ enum machine_mode half_mode;
+ bool use_vec_merge = false;
+ rtx tmp;
+ static rtx (*gen_extract[6][2]) (rtx, rtx)
+ = {
+ { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
+ { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
+ { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
+ { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
+ { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
+ { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
+ };
+ static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
+ = {
+ { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
+ { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
+ { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
+ { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
+ { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
+ { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
+ };
+ int i, j, n;
+
+ switch (mode)
+ {
+ case V2SFmode:
+ case V2SImode:
+ if (mmx_ok)
+ {
+ tmp = gen_reg_rtx (GET_MODE_INNER (mode));
+ ix86_expand_vector_extract (true, tmp, target, 1 - elt);
+ if (elt == 0)
+ tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
+ else
+ tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
+ emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+ return;
+ }
+ break;
+
+ case V2DImode:
+ use_vec_merge = TARGET_SSE4_1;
+ if (use_vec_merge)
+ break;
+
+ case V2DFmode:
+ {
+ rtx op0, op1;
+
+ /* For the two element vectors, we implement a VEC_CONCAT with
+ the extraction of the other element. */
+
+ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
+ tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
+
+ if (elt == 0)
+ op0 = val, op1 = tmp;
+ else
+ op0 = tmp, op1 = val;
+
+ tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
+ emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+ }
+ return;
+
+ case V4SFmode:
+ use_vec_merge = TARGET_SSE4_1;
+ if (use_vec_merge)
+ break;
+
+ switch (elt)
+ {
+ case 0:
+ use_vec_merge = true;
+ break;
+
+ case 1:
+ /* tmp = target = A B C D */
+ tmp = copy_to_reg (target);
+ /* target = A A B B */
+ emit_insn (gen_sse_unpcklps (target, target, target));
+ /* target = X A B B */
+ ix86_expand_vector_set (false, target, val, 0);
+ /* target = A X C D */
+ emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
+ GEN_INT (1), GEN_INT (0),
+ GEN_INT (2+4), GEN_INT (3+4)));
+ return;
+
+ case 2:
+ /* tmp = target = A B C D */
+ tmp = copy_to_reg (target);
+ /* tmp = X B C D */
+ ix86_expand_vector_set (false, tmp, val, 0);
+ /* target = A B X D */
+ emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
+ GEN_INT (0), GEN_INT (1),
+ GEN_INT (0+4), GEN_INT (3+4)));
+ return;
+
+ case 3:
+ /* tmp = target = A B C D */
+ tmp = copy_to_reg (target);
+ /* tmp = X B C D */
+ ix86_expand_vector_set (false, tmp, val, 0);
+ /* target = A B X D */
+ emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
+ GEN_INT (0), GEN_INT (1),
+ GEN_INT (2+4), GEN_INT (0+4)));
+ return;
+
+ default:
+ gcc_unreachable ();
+ }
+ break;
+
+ case V4SImode:
+ use_vec_merge = TARGET_SSE4_1;
+ if (use_vec_merge)
+ break;
+
+ /* Element 0 handled by vec_merge below. */
+ if (elt == 0)
+ {
+ use_vec_merge = true;
+ break;
+ }
+
+ if (TARGET_SSE2)
+ {
+ /* With SSE2, use integer shuffles to swap element 0 and ELT,
+ store into element 0, then shuffle them back. */
+
+ rtx order[4];
+
+ order[0] = GEN_INT (elt);
+ order[1] = const1_rtx;
+ order[2] = const2_rtx;
+ order[3] = GEN_INT (3);
+ order[elt] = const0_rtx;
+
+ emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
+ order[1], order[2], order[3]));
+
+ ix86_expand_vector_set (false, target, val, 0);
+
+ emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
+ order[1], order[2], order[3]));
+ }
+ else
+ {
+ /* For SSE1, we have to reuse the V4SF code. */
+ ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
+ gen_lowpart (SFmode, val), elt);
+ }
+ return;
+
+ case V8HImode:
+ use_vec_merge = TARGET_SSE2;
+ break;
+ case V4HImode:
+ use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
+ break;
+
+ case V16QImode:
+ use_vec_merge = TARGET_SSE4_1;
+ break;
+
+ case V8QImode:
+ break;
+
+ case V32QImode:
+ half_mode = V16QImode;
+ j = 0;
+ n = 16;
+ goto half;
+
+ case V16HImode:
+ half_mode = V8HImode;
+ j = 1;
+ n = 8;
+ goto half;
+
+ case V8SImode:
+ half_mode = V4SImode;
+ j = 2;
+ n = 4;
+ goto half;
+
+ case V4DImode:
+ half_mode = V2DImode;
+ j = 3;
+ n = 2;
+ goto half;
+
+ case V8SFmode:
+ half_mode = V4SFmode;
+ j = 4;
+ n = 4;
+ goto half;
+
+ case V4DFmode:
+ half_mode = V2DFmode;
+ j = 5;
+ n = 2;
+ goto half;
+
+half:
+ /* Compute offset. */
+ i = elt / n;
+ elt %= n;
+
+ gcc_assert (i <= 1);
+
+ /* Extract the half. */
+ tmp = gen_reg_rtx (half_mode);
+ emit_insn ((*gen_extract[j][i]) (tmp, target));
+
+ /* Put val in tmp at elt. */
+ ix86_expand_vector_set (false, tmp, val, elt);
+
+ /* Put it back. */
+ emit_insn ((*gen_insert[j][i]) (target, target, tmp));
+ return;
+
+ default:
+ break;
+ }
+
+ if (use_vec_merge)
+ {
+ tmp = gen_rtx_VEC_DUPLICATE (mode, val);
+ tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
+ emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+ }
+ else
+ {
+ rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
+
+ emit_move_insn (mem, target);
+
+ tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
+ emit_move_insn (tmp, val);
+
+ emit_move_insn (target, mem);
+ }
+}
+
+void
+ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
+{
+ enum machine_mode mode = GET_MODE (vec);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ bool use_vec_extr = false;
+ rtx tmp;
+
+ switch (mode)
+ {
+ case V2SImode:
+ case V2SFmode:
+ if (!mmx_ok)
+ break;
+ /* FALLTHRU */
+
+ case V2DFmode:
+ case V2DImode:
+ use_vec_extr = true;
+ break;
+
+ case V4SFmode:
+ use_vec_extr = TARGET_SSE4_1;
+ if (use_vec_extr)
+ break;
+
+ switch (elt)
+ {
+ case 0:
+ tmp = vec;
+ break;
+
+ case 1:
+ case 3:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
+ GEN_INT (elt), GEN_INT (elt),
+ GEN_INT (elt+4), GEN_INT (elt+4)));
+ break;
+
+ case 2:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_sse_unpckhps (tmp, vec, vec));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ vec = tmp;
+ use_vec_extr = true;
+ elt = 0;
+ break;
+
+ case V4SImode:
+ use_vec_extr = TARGET_SSE4_1;
+ if (use_vec_extr)
+ break;
+
+ if (TARGET_SSE2)
+ {
+ switch (elt)
+ {
+ case 0:
+ tmp = vec;
+ break;
+
+ case 1:
+ case 3:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_sse2_pshufd_1 (tmp, vec,
+ GEN_INT (elt), GEN_INT (elt),
+ GEN_INT (elt), GEN_INT (elt)));
+ break;
+
+ case 2:
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ vec = tmp;
+ use_vec_extr = true;
+ elt = 0;
+ }
+ else
+ {
+ /* For SSE1, we have to reuse the V4SF code. */
+ ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
+ gen_lowpart (V4SFmode, vec), elt);
+ return;
+ }
+ break;
+
+ case V8HImode:
+ use_vec_extr = TARGET_SSE2;
+ break;
+ case V4HImode:
+ use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
+ break;
+
+ case V16QImode:
+ use_vec_extr = TARGET_SSE4_1;
+ break;
+
+ case V8QImode:
+ /* ??? Could extract the appropriate HImode element and shift. */
+ default:
+ break;
+ }
+
+ if (use_vec_extr)
+ {
+ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
+ tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
+
+ /* Let the rtl optimizers know about the zero extension performed. */
+ if (inner_mode == QImode || inner_mode == HImode)
+ {
+ tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
+ target = gen_lowpart (SImode, target);
+ }
+
+ emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+ }
+ else
+ {
+ rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
+
+ emit_move_insn (mem, vec);
+
+ tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
+ emit_move_insn (target, tmp);
+ }
+}
+
+/* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
+ pattern to reduce; DEST is the destination; IN is the input vector. */
+
+void
+ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
+{
+ rtx tmp1, tmp2, tmp3;
+
+ tmp1 = gen_reg_rtx (V4SFmode);
+ tmp2 = gen_reg_rtx (V4SFmode);
+ tmp3 = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_sse_movhlps (tmp1, in, in));
+ emit_insn (fn (tmp2, tmp1, in));
+
+ emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
+ GEN_INT (1), GEN_INT (1),
+ GEN_INT (1+4), GEN_INT (1+4)));
+ emit_insn (fn (dest, tmp2, tmp3));
+}
+
+/* Target hook for scalar_mode_supported_p. */
+static bool
+ix86_scalar_mode_supported_p (enum machine_mode mode)
+{
+ if (DECIMAL_FLOAT_MODE_P (mode))
+ return true;
+ else if (mode == TFmode)
+ return true;
+ else
+ return default_scalar_mode_supported_p (mode);
+}
+
+/* Implements target hook vector_mode_supported_p. */
+static bool
+ix86_vector_mode_supported_p (enum machine_mode mode)
+{
+ if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
+ return true;
+ if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
+ return true;
+ if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
+ return true;
+ if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
+ return true;
+ if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
+ return true;
+ return false;
+}
+
+/* Target hook for c_mode_for_suffix. */
+static enum machine_mode
+ix86_c_mode_for_suffix (char suffix)
+{
+ if (suffix == 'q')
+ return TFmode;
+ if (suffix == 'w')
+ return XFmode;
+
+ return VOIDmode;
+}
+
+/* Worker function for TARGET_MD_ASM_CLOBBERS.
+
+ We do this in the new i386 backend to maintain source compatibility
+ with the old cc0-based compiler. */
+
+static tree
+ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
+ tree inputs ATTRIBUTE_UNUSED,
+ tree clobbers)
+{
+ clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
+ clobbers);
+ clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
+ clobbers);
+ return clobbers;
+}
+
+/* Implements target vector targetm.asm.encode_section_info. This
+ is not used by netware. */
+
+static void ATTRIBUTE_UNUSED
+ix86_encode_section_info (tree decl, rtx rtl, int first)
+{
+ default_encode_section_info (decl, rtl, first);
+
+ if (TREE_CODE (decl) == VAR_DECL
+ && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
+ && ix86_in_large_data_p (decl))
+ SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
+}
+
+/* Worker function for REVERSE_CONDITION. */
+
+enum rtx_code
+ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
+{
+ return (mode != CCFPmode && mode != CCFPUmode
+ ? reverse_condition (code)
+ : reverse_condition_maybe_unordered (code));
+}
+
+/* Output code to perform an x87 FP register move, from OPERANDS[1]
+ to OPERANDS[0]. */
+
+const char *
+output_387_reg_move (rtx insn, rtx *operands)
+{
+ if (REG_P (operands[0]))
+ {
+ if (REG_P (operands[1])
+ && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ {
+ if (REGNO (operands[0]) == FIRST_STACK_REG)
+ return output_387_ffreep (operands, 0);
+ return "fstp\t%y0";
+ }
+ if (STACK_TOP_P (operands[0]))
+ return "fld%z1\t%y1";
+ return "fst\t%y0";
+ }
+ else if (MEM_P (operands[0]))
+ {
+ gcc_assert (REG_P (operands[1]));
+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+ return "fstp%z0\t%y0";
+ else
+ {
+ /* There is no non-popping store to memory for XFmode.
+ So if we need one, follow the store with a load. */
+ if (GET_MODE (operands[0]) == XFmode)
+ return "fstp%z0\t%y0\n\tfld%z0\t%y0";
+ else
+ return "fst%z0\t%y0";
+ }
+ }
+ else
+ gcc_unreachable();
+}
+
+/* Output code to perform a conditional jump to LABEL, if C2 flag in
+ FP status register is set. */
+
+void
+ix86_emit_fp_unordered_jump (rtx label)
+{
+ rtx reg = gen_reg_rtx (HImode);
+ rtx temp;
+
+ emit_insn (gen_x86_fnstsw_1 (reg));
+
+ if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
+ {
+ emit_insn (gen_x86_sahf_1 (reg));
+
+ temp = gen_rtx_REG (CCmode, FLAGS_REG);
+ temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
+ }
+ else
+ {
+ emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
+
+ temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
+ }
+
+ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
+
+ emit_jump_insn (temp);
+ predict_jump (REG_BR_PROB_BASE * 10 / 100);
+}
+
+/* Output code to perform a log1p XFmode calculation. */
+
+void ix86_emit_i387_log1p (rtx op0, rtx op1)
+{
+ rtx label1 = gen_label_rtx ();
+ rtx label2 = gen_label_rtx ();
+
+ rtx tmp = gen_reg_rtx (XFmode);
+ rtx tmp2 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_absxf2 (tmp, op1));
+ emit_insn (gen_cmpxf (tmp,
+ CONST_DOUBLE_FROM_REAL_VALUE (
+ REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
+ XFmode)));
+ emit_jump_insn (gen_bge (label1));
+
+ emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
+ emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
+ emit_jump (label2);
+
+ emit_label (label1);
+ emit_move_insn (tmp, CONST1_RTX (XFmode));
+ emit_insn (gen_addxf3 (tmp, op1, tmp));
+ emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
+ emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
+
+ emit_label (label2);
+}
+
+/* Output code to perform a Newton-Rhapson approximation of a single precision
+ floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
+
+void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
+{
+ rtx x0, x1, e0, e1, two;
+
+ x0 = gen_reg_rtx (mode);
+ e0 = gen_reg_rtx (mode);
+ e1 = gen_reg_rtx (mode);
+ x1 = gen_reg_rtx (mode);
+
+ two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
+
+ if (VECTOR_MODE_P (mode))
+ two = ix86_build_const_vector (SFmode, true, two);
+
+ two = force_reg (mode, two);
+
+ /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
+
+ /* x0 = rcp(b) estimate */
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
+ UNSPEC_RCP)));
+ /* e0 = x0 * b */
+ emit_insn (gen_rtx_SET (VOIDmode, e0,
+ gen_rtx_MULT (mode, x0, b)));
+ /* e1 = 2. - e0 */
+ emit_insn (gen_rtx_SET (VOIDmode, e1,
+ gen_rtx_MINUS (mode, two, e0)));
+ /* x1 = x0 * e1 */
+ emit_insn (gen_rtx_SET (VOIDmode, x1,
+ gen_rtx_MULT (mode, x0, e1)));
+ /* res = a * x1 */
+ emit_insn (gen_rtx_SET (VOIDmode, res,
+ gen_rtx_MULT (mode, a, x1)));
+}
+
+/* Output code to perform a Newton-Rhapson approximation of a
+ single precision floating point [reciprocal] square root. */
+
+void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
+ bool recip)
+{
+ rtx x0, e0, e1, e2, e3, mthree, mhalf;
+ REAL_VALUE_TYPE r;
+
+ x0 = gen_reg_rtx (mode);
+ e0 = gen_reg_rtx (mode);
+ e1 = gen_reg_rtx (mode);
+ e2 = gen_reg_rtx (mode);
+ e3 = gen_reg_rtx (mode);
+
+ real_from_integer (&r, VOIDmode, -3, -1, 0);
+ mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
+
+ real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
+ mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
+
+ if (VECTOR_MODE_P (mode))
+ {
+ mthree = ix86_build_const_vector (SFmode, true, mthree);
+ mhalf = ix86_build_const_vector (SFmode, true, mhalf);
+ }
+
+ /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
+ rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
+
+ /* x0 = rsqrt(a) estimate */
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
+ UNSPEC_RSQRT)));
+
+ /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
+ if (!recip)
+ {
+ rtx zero, mask;
+
+ zero = gen_reg_rtx (mode);
+ mask = gen_reg_rtx (mode);
+
+ zero = force_reg (mode, CONST0_RTX(mode));
+ emit_insn (gen_rtx_SET (VOIDmode, mask,
+ gen_rtx_NE (mode, zero, a)));
+
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_AND (mode, x0, mask)));
+ }
+
+ /* e0 = x0 * a */
+ emit_insn (gen_rtx_SET (VOIDmode, e0,
+ gen_rtx_MULT (mode, x0, a)));
+ /* e1 = e0 * x0 */
+ emit_insn (gen_rtx_SET (VOIDmode, e1,
+ gen_rtx_MULT (mode, e0, x0)));
+
+ /* e2 = e1 - 3. */
+ mthree = force_reg (mode, mthree);
+ emit_insn (gen_rtx_SET (VOIDmode, e2,
+ gen_rtx_PLUS (mode, e1, mthree)));
+
+ mhalf = force_reg (mode, mhalf);
+ if (recip)
+ /* e3 = -.5 * x0 */
+ emit_insn (gen_rtx_SET (VOIDmode, e3,
+ gen_rtx_MULT (mode, x0, mhalf)));
+ else
+ /* e3 = -.5 * e0 */
+ emit_insn (gen_rtx_SET (VOIDmode, e3,
+ gen_rtx_MULT (mode, e0, mhalf)));
+ /* ret = e2 * e3 */
+ emit_insn (gen_rtx_SET (VOIDmode, res,
+ gen_rtx_MULT (mode, e2, e3)));
+}
+
+/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
+
+static void ATTRIBUTE_UNUSED
+i386_solaris_elf_named_section (const char *name, unsigned int flags,
+ tree decl)
+{
+ /* With Binutils 2.15, the "@unwind" marker must be specified on
+ every occurrence of the ".eh_frame" section, not just the first
+ one. */
+ if (TARGET_64BIT
+ && strcmp (name, ".eh_frame") == 0)
+ {
+ fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
+ flags & SECTION_WRITE ? "aw" : "a");
+ return;
+ }
+ default_elf_asm_named_section (name, flags, decl);
+}
+
+/* Return the mangling of TYPE if it is an extended fundamental type. */
+
+static const char *
+ix86_mangle_type (const_tree type)
+{
+ type = TYPE_MAIN_VARIANT (type);
+
+ if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
+ && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+ return NULL;
+
+ switch (TYPE_MODE (type))
+ {
+ case TFmode:
+ /* __float128 is "g". */
+ return "g";
+ case XFmode:
+ /* "long double" or __float80 is "e". */
+ return "e";
+ default:
+ return NULL;
+ }
+}
+
+/* For 32-bit code we can save PIC register setup by using
+ __stack_chk_fail_local hidden function instead of calling
+ __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
+ register, so it is better to call __stack_chk_fail directly. */
+
+static tree
+ix86_stack_protect_fail (void)
+{
+ return TARGET_64BIT
+ ? default_external_stack_protect_fail ()
+ : default_hidden_stack_protect_fail ();
+}
+
+/* Select a format to encode pointers in exception handling data. CODE
+ is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
+ true if the symbol may be affected by dynamic relocations.
+
+ ??? All x86 object file formats are capable of representing this.
+ After all, the relocation needed is the same as for the call insn.
+ Whether or not a particular assembler allows us to enter such, I
+ guess we'll have to see. */
+int
+asm_preferred_eh_data_format (int code, int global)
+{
+ if (flag_pic)
+ {
+ int type = DW_EH_PE_sdata8;
+ if (!TARGET_64BIT
+ || ix86_cmodel == CM_SMALL_PIC
+ || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
+ type = DW_EH_PE_sdata4;
+ return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
+ }
+ if (ix86_cmodel == CM_SMALL
+ || (ix86_cmodel == CM_MEDIUM && code))
+ return DW_EH_PE_udata4;
+ return DW_EH_PE_absptr;
+}
+
+/* Expand copysign from SIGN to the positive value ABS_VALUE
+ storing in RESULT. If MASK is non-null, it shall be a mask to mask out
+ the sign-bit. */
+static void
+ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
+{
+ enum machine_mode mode = GET_MODE (sign);
+ rtx sgn = gen_reg_rtx (mode);
+ if (mask == NULL_RTX)
+ {
+ mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
+ if (!VECTOR_MODE_P (mode))
+ {
+ /* We need to generate a scalar mode mask in this case. */
+ rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+ tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
+ mask = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
+ }
+ }
+ else
+ mask = gen_rtx_NOT (mode, mask);
+ emit_insn (gen_rtx_SET (VOIDmode, sgn,
+ gen_rtx_AND (mode, mask, sign)));
+ emit_insn (gen_rtx_SET (VOIDmode, result,
+ gen_rtx_IOR (mode, abs_value, sgn)));
+}
+
+/* Expand fabs (OP0) and return a new rtx that holds the result. The
+ mask for masking out the sign-bit is stored in *SMASK, if that is
+ non-null. */
+static rtx
+ix86_expand_sse_fabs (rtx op0, rtx *smask)
+{
+ enum machine_mode mode = GET_MODE (op0);
+ rtx xa, mask;
+
+ xa = gen_reg_rtx (mode);
+ mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
+ if (!VECTOR_MODE_P (mode))
+ {
+ /* We need to generate a scalar mode mask in this case. */
+ rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+ tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
+ mask = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
+ }
+ emit_insn (gen_rtx_SET (VOIDmode, xa,
+ gen_rtx_AND (mode, op0, mask)));
+
+ if (smask)
+ *smask = mask;
+
+ return xa;
+}
+
+/* Expands a comparison of OP0 with OP1 using comparison code CODE,
+ swapping the operands if SWAP_OPERANDS is true. The expanded
+ code is a forward jump to a newly created label in case the
+ comparison is true. The generated label rtx is returned. */
+static rtx
+ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
+ bool swap_operands)
+{
+ rtx label, tmp;
+
+ if (swap_operands)
+ {
+ tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ }
+
+ label = gen_label_rtx ();
+ tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_COMPARE (CCFPUmode, op0, op1)));
+ tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
+ tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ JUMP_LABEL (tmp) = label;
+
+ return label;
+}
+
+/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
+ using comparison code CODE. Operands are swapped for the comparison if
+ SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
+static rtx
+ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
+ bool swap_operands)
+{
+ enum machine_mode mode = GET_MODE (op0);
+ rtx mask = gen_reg_rtx (mode);
+
+ if (swap_operands)
+ {
+ rtx tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ }
+
+ if (mode == DFmode)
+ emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
+ gen_rtx_fmt_ee (code, mode, op0, op1)));
+ else
+ emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
+ gen_rtx_fmt_ee (code, mode, op0, op1)));
+
+ return mask;
+}
+
+/* Generate and return a rtx of mode MODE for 2**n where n is the number
+ of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
+static rtx
+ix86_gen_TWO52 (enum machine_mode mode)
+{
+ REAL_VALUE_TYPE TWO52r;
+ rtx TWO52;
+
+ real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
+ TWO52 = const_double_from_real_value (TWO52r, mode);
+ TWO52 = force_reg (mode, TWO52);
+
+ return TWO52;
+}
+
+/* Expand SSE sequence for computing lround from OP1 storing
+ into OP0. */
+void
+ix86_expand_lround (rtx op0, rtx op1)
+{
+ /* C code for the stuff we're doing below:
+ tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
+ return (long)tmp;
+ */
+ enum machine_mode mode = GET_MODE (op1);
+ const struct real_format *fmt;
+ REAL_VALUE_TYPE pred_half, half_minus_pred_half;
+ rtx adj;
+
+ /* load nextafter (0.5, 0.0) */
+ fmt = REAL_MODE_FORMAT (mode);
+ real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
+ REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
+
+ /* adj = copysign (0.5, op1) */
+ adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
+ ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
+
+ /* adj = op1 + adj */
+ adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
+
+ /* op0 = (imode)adj */
+ expand_fix (op0, adj, 0);
+}
+
+/* Expand SSE2 sequence for computing lround from OPERAND1 storing
+ into OPERAND0. */
+void
+ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
+{
+ /* C code for the stuff we're doing below (for do_floor):
+ xi = (long)op1;
+ xi -= (double)xi > op1 ? 1 : 0;
+ return xi;
+ */
+ enum machine_mode fmode = GET_MODE (op1);
+ enum machine_mode imode = GET_MODE (op0);
+ rtx ireg, freg, label, tmp;
+
+ /* reg = (long)op1 */
+ ireg = gen_reg_rtx (imode);
+ expand_fix (ireg, op1, 0);
+
+ /* freg = (double)reg */
+ freg = gen_reg_rtx (fmode);
+ expand_float (freg, ireg, 0);
+
+ /* ireg = (freg > op1) ? ireg - 1 : ireg */
+ label = ix86_expand_sse_compare_and_jump (UNLE,
+ freg, op1, !do_floor);
+ tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
+ ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
+ emit_move_insn (ireg, tmp);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (op0, ireg);
+}
+
+/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
+ result in OPERAND0. */
+void
+ix86_expand_rint (rtx operand0, rtx operand1)
+{
+ /* C code for the stuff we're doing below:
+ xa = fabs (operand1);
+ if (!isless (xa, 2**52))
+ return operand1;
+ xa = xa + 2**52 - 2**52;
+ return copysign (xa, operand1);
+ */
+ enum machine_mode mode = GET_MODE (operand0);
+ rtx res, xa, label, TWO52, mask;
+
+ res = gen_reg_rtx (mode);
+ emit_move_insn (res, operand1);
+
+ /* xa = abs (operand1) */
+ xa = ix86_expand_sse_fabs (res, &mask);
+
+ /* if (!isless (xa, TWO52)) goto label; */
+ TWO52 = ix86_gen_TWO52 (mode);
+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+ xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
+ xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
+
+ ix86_sse_copysign_to_positive (res, xa, res, mask);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (operand0, res);
+}
+
+/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
+ into OPERAND0. */
+void
+ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
+{
+ /* C code for the stuff we expand below.
+ double xa = fabs (x), x2;
+ if (!isless (xa, TWO52))
+ return x;
+ xa = xa + TWO52 - TWO52;
+ x2 = copysign (xa, x);
+ Compensate. Floor:
+ if (x2 > x)
+ x2 -= 1;
+ Compensate. Ceil:
+ if (x2 < x)
+ x2 -= -1;
+ return x2;
+ */
+ enum machine_mode mode = GET_MODE (operand0);
+ rtx xa, TWO52, tmp, label, one, res, mask;
+
+ TWO52 = ix86_gen_TWO52 (mode);
+
+ /* Temporary for holding the result, initialized to the input
+ operand to ease control flow. */
+ res = gen_reg_rtx (mode);
+ emit_move_insn (res, operand1);
+
+ /* xa = abs (operand1) */
+ xa = ix86_expand_sse_fabs (res, &mask);
+
+ /* if (!isless (xa, TWO52)) goto label; */
+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+ /* xa = xa + TWO52 - TWO52; */
+ xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
+ xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
+
+ /* xa = copysign (xa, operand1) */
+ ix86_sse_copysign_to_positive (xa, xa, res, mask);
+
+ /* generate 1.0 or -1.0 */
+ one = force_reg (mode,
+ const_double_from_real_value (do_floor
+ ? dconst1 : dconstm1, mode));
+
+ /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
+ tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_AND (mode, one, tmp)));
+ /* We always need to subtract here to preserve signed zero. */
+ tmp = expand_simple_binop (mode, MINUS,
+ xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+ emit_move_insn (res, tmp);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (operand0, res);
+}
+
+/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
+ into OPERAND0. */
+void
+ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
+{
+ /* C code for the stuff we expand below.
+ double xa = fabs (x), x2;
+ if (!isless (xa, TWO52))
+ return x;
+ x2 = (double)(long)x;
+ Compensate. Floor:
+ if (x2 > x)
+ x2 -= 1;
+ Compensate. Ceil:
+ if (x2 < x)
+ x2 += 1;
+ if (HONOR_SIGNED_ZEROS (mode))
+ return copysign (x2, x);
+ return x2;
+ */
+ enum machine_mode mode = GET_MODE (operand0);
+ rtx xa, xi, TWO52, tmp, label, one, res, mask;
+
+ TWO52 = ix86_gen_TWO52 (mode);
+
+ /* Temporary for holding the result, initialized to the input
+ operand to ease control flow. */
+ res = gen_reg_rtx (mode);
+ emit_move_insn (res, operand1);
+
+ /* xa = abs (operand1) */
+ xa = ix86_expand_sse_fabs (res, &mask);
+
+ /* if (!isless (xa, TWO52)) goto label; */
+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+ /* xa = (double)(long)x */
+ xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
+ expand_fix (xi, res, 0);
+ expand_float (xa, xi, 0);
+
+ /* generate 1.0 */
+ one = force_reg (mode, const_double_from_real_value (dconst1, mode));
+
+ /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
+ tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_AND (mode, one, tmp)));
+ tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
+ xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+ emit_move_insn (res, tmp);
+
+ if (HONOR_SIGNED_ZEROS (mode))
+ ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing round from OPERAND1 storing
+ into OPERAND0. Sequence that works without relying on DImode truncation
+ via cvttsd2siq that is only available on 64bit targets. */
+void
+ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
+{
+ /* C code for the stuff we expand below.
+ double xa = fabs (x), xa2, x2;
+ if (!isless (xa, TWO52))
+ return x;
+ Using the absolute value and copying back sign makes
+ -0.0 -> -0.0 correct.
+ xa2 = xa + TWO52 - TWO52;
+ Compensate.
+ dxa = xa2 - xa;
+ if (dxa <= -0.5)
+ xa2 += 1;
+ else if (dxa > 0.5)
+ xa2 -= 1;
+ x2 = copysign (xa2, x);
+ return x2;
+ */
+ enum machine_mode mode = GET_MODE (operand0);
+ rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
+
+ TWO52 = ix86_gen_TWO52 (mode);
+
+ /* Temporary for holding the result, initialized to the input
+ operand to ease control flow. */
+ res = gen_reg_rtx (mode);
+ emit_move_insn (res, operand1);
+
+ /* xa = abs (operand1) */
+ xa = ix86_expand_sse_fabs (res, &mask);
+
+ /* if (!isless (xa, TWO52)) goto label; */
+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+ /* xa2 = xa + TWO52 - TWO52; */
+ xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
+ xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
+
+ /* dxa = xa2 - xa; */
+ dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
+
+ /* generate 0.5, 1.0 and -0.5 */
+ half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
+ one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
+ mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
+ 0, OPTAB_DIRECT);
+
+ /* Compensate. */
+ tmp = gen_reg_rtx (mode);
+ /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
+ tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_AND (mode, one, tmp)));
+ xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+ /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
+ tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_AND (mode, one, tmp)));
+ xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+
+ /* res = copysign (xa2, operand1) */
+ ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing trunc from OPERAND1 storing
+ into OPERAND0. */
+void
+ix86_expand_trunc (rtx operand0, rtx operand1)
+{
+ /* C code for SSE variant we expand below.
+ double xa = fabs (x), x2;
+ if (!isless (xa, TWO52))
+ return x;
+ x2 = (double)(long)x;
+ if (HONOR_SIGNED_ZEROS (mode))
+ return copysign (x2, x);
+ return x2;
+ */
+ enum machine_mode mode = GET_MODE (operand0);
+ rtx xa, xi, TWO52, label, res, mask;
+
+ TWO52 = ix86_gen_TWO52 (mode);
+
+ /* Temporary for holding the result, initialized to the input
+ operand to ease control flow. */
+ res = gen_reg_rtx (mode);
+ emit_move_insn (res, operand1);
+
+ /* xa = abs (operand1) */
+ xa = ix86_expand_sse_fabs (res, &mask);
+
+ /* if (!isless (xa, TWO52)) goto label; */
+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+ /* x = (double)(long)x */
+ xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
+ expand_fix (xi, res, 0);
+ expand_float (res, xi, 0);
+
+ if (HONOR_SIGNED_ZEROS (mode))
+ ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing trunc from OPERAND1 storing
+ into OPERAND0. */
+void
+ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
+{
+ enum machine_mode mode = GET_MODE (operand0);
+ rtx xa, mask, TWO52, label, one, res, smask, tmp;
+
+ /* C code for SSE variant we expand below.
+ double xa = fabs (x), x2;
+ if (!isless (xa, TWO52))
+ return x;
+ xa2 = xa + TWO52 - TWO52;
+ Compensate:
+ if (xa2 > xa)
+ xa2 -= 1.0;
+ x2 = copysign (xa2, x);
+ return x2;
+ */
+
+ TWO52 = ix86_gen_TWO52 (mode);
+
+ /* Temporary for holding the result, initialized to the input
+ operand to ease control flow. */
+ res = gen_reg_rtx (mode);
+ emit_move_insn (res, operand1);
+
+ /* xa = abs (operand1) */
+ xa = ix86_expand_sse_fabs (res, &smask);
+
+ /* if (!isless (xa, TWO52)) goto label; */
+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+ /* res = xa + TWO52 - TWO52; */
+ tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
+ tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
+ emit_move_insn (res, tmp);
+
+ /* generate 1.0 */
+ one = force_reg (mode, const_double_from_real_value (dconst1, mode));
+
+ /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
+ mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
+ emit_insn (gen_rtx_SET (VOIDmode, mask,
+ gen_rtx_AND (mode, mask, one)));
+ tmp = expand_simple_binop (mode, MINUS,
+ res, mask, NULL_RTX, 0, OPTAB_DIRECT);
+ emit_move_insn (res, tmp);
+
+ /* res = copysign (res, operand1) */
+ ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (operand0, res);
+}
+
+/* Expand SSE sequence for computing round from OPERAND1 storing
+ into OPERAND0. */
+void
+ix86_expand_round (rtx operand0, rtx operand1)
+{
+ /* C code for the stuff we're doing below:
+ double xa = fabs (x);
+ if (!isless (xa, TWO52))
+ return x;
+ xa = (double)(long)(xa + nextafter (0.5, 0.0));
+ return copysign (xa, x);
+ */
+ enum machine_mode mode = GET_MODE (operand0);
+ rtx res, TWO52, xa, label, xi, half, mask;
+ const struct real_format *fmt;
+ REAL_VALUE_TYPE pred_half, half_minus_pred_half;
+
+ /* Temporary for holding the result, initialized to the input
+ operand to ease control flow. */
+ res = gen_reg_rtx (mode);
+ emit_move_insn (res, operand1);
+
+ TWO52 = ix86_gen_TWO52 (mode);
+ xa = ix86_expand_sse_fabs (res, &mask);
+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
+
+ /* load nextafter (0.5, 0.0) */
+ fmt = REAL_MODE_FORMAT (mode);
+ real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
+ REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
+
+ /* xa = xa + 0.5 */
+ half = force_reg (mode, const_double_from_real_value (pred_half, mode));
+ xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
+
+ /* xa = (double)(int64_t)xa */
+ xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
+ expand_fix (xi, xa, 0);
+ expand_float (xa, xi, 0);
+
+ /* res = copysign (xa, operand1) */
+ ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (operand0, res);
+}
+
+
+/* Validate whether a SSE5 instruction is valid or not.
+ OPERANDS is the array of operands.
+ NUM is the number of operands.
+ USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
+ NUM_MEMORY is the maximum number of memory operands to accept.
+ when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
+
+bool
+ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
+ bool uses_oc0, int num_memory, bool commutative)
+{
+ int mem_mask;
+ int mem_count;
+ int i;
+
+ /* Count the number of memory arguments */
+ mem_mask = 0;
+ mem_count = 0;
+ for (i = 0; i < num; i++)
+ {
+ enum machine_mode mode = GET_MODE (operands[i]);
+ if (register_operand (operands[i], mode))
+ ;
+
+ else if (memory_operand (operands[i], mode))
+ {
+ mem_mask |= (1 << i);
+ mem_count++;
+ }
+
+ else
+ {
+ rtx pattern = PATTERN (insn);
+
+ /* allow 0 for pcmov */
+ if (GET_CODE (pattern) != SET
+ || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
+ || i < 2
+ || operands[i] != CONST0_RTX (mode))
+ return false;
+ }
+ }
+
+ /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
+ a memory operation. */
+ if (num_memory < 0)
+ {
+ num_memory = -num_memory;
+ if ((mem_mask & (1 << (num-1))) != 0)
+ {
+ mem_mask &= ~(1 << (num-1));
+ mem_count--;
+ }
+ }
+
+ /* If there were no memory operations, allow the insn */
+ if (mem_mask == 0)
+ return true;
+
+ /* Do not allow the destination register to be a memory operand. */
+ else if (mem_mask & (1 << 0))
+ return false;
+
+ /* If there are too many memory operations, disallow the instruction. While
+ the hardware only allows 1 memory reference, before register allocation
+ for some insns, we allow two memory operations sometimes in order to allow
+ code like the following to be optimized:
+
+ float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
+
+ or similar cases that are vectorized into using the fmaddss
+ instruction. */
+ else if (mem_count > num_memory)
+ return false;
+
+ /* Don't allow more than one memory operation if not optimizing. */
+ else if (mem_count > 1 && !optimize)
+ return false;
+
+ else if (num == 4 && mem_count == 1)
+ {
+ /* formats (destination is the first argument), example fmaddss:
+ xmm1, xmm1, xmm2, xmm3/mem
+ xmm1, xmm1, xmm2/mem, xmm3
+ xmm1, xmm2, xmm3/mem, xmm1
+ xmm1, xmm2/mem, xmm3, xmm1 */
+ if (uses_oc0)
+ return ((mem_mask == (1 << 1))
+ || (mem_mask == (1 << 2))
+ || (mem_mask == (1 << 3)));
+
+ /* format, example pmacsdd:
+ xmm1, xmm2, xmm3/mem, xmm1 */
+ if (commutative)
+ return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
+ else
+ return (mem_mask == (1 << 2));
+ }
+
+ else if (num == 4 && num_memory == 2)
+ {
+ /* If there are two memory operations, we can load one of the memory ops
+ into the destination register. This is for optimizing the
+ multiply/add ops, which the combiner has optimized both the multiply
+ and the add insns to have a memory operation. We have to be careful
+ that the destination doesn't overlap with the inputs. */
+ rtx op0 = operands[0];
+
+ if (reg_mentioned_p (op0, operands[1])
+ || reg_mentioned_p (op0, operands[2])
+ || reg_mentioned_p (op0, operands[3]))
+ return false;
+
+ /* formats (destination is the first argument), example fmaddss:
+ xmm1, xmm1, xmm2, xmm3/mem
+ xmm1, xmm1, xmm2/mem, xmm3
+ xmm1, xmm2, xmm3/mem, xmm1
+ xmm1, xmm2/mem, xmm3, xmm1
+
+ For the oc0 case, we will load either operands[1] or operands[3] into
+ operands[0], so any combination of 2 memory operands is ok. */
+ if (uses_oc0)
+ return true;
+
+ /* format, example pmacsdd:
+ xmm1, xmm2, xmm3/mem, xmm1
+
+ For the integer multiply/add instructions be more restrictive and
+ require operands[2] and operands[3] to be the memory operands. */
+ if (commutative)
+ return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
+ else
+ return (mem_mask == ((1 << 2) | (1 << 3)));
+ }
+
+ else if (num == 3 && num_memory == 1)
+ {
+ /* formats, example protb:
+ xmm1, xmm2, xmm3/mem
+ xmm1, xmm2/mem, xmm3 */
+ if (uses_oc0)
+ return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
+
+ /* format, example comeq:
+ xmm1, xmm2, xmm3/mem */
+ else
+ return (mem_mask == (1 << 2));
+ }
+
+ else
+ gcc_unreachable ();
+
+ return false;
+}
+
+
+/* Fixup an SSE5 instruction that has 2 memory input references into a form the
+ hardware will allow by using the destination register to load one of the
+ memory operations. Presently this is used by the multiply/add routines to
+ allow 2 memory references. */
+
+void
+ix86_expand_sse5_multiple_memory (rtx operands[],
+ int num,
+ enum machine_mode mode)
+{
+ rtx op0 = operands[0];
+ if (num != 4
+ || memory_operand (op0, mode)
+ || reg_mentioned_p (op0, operands[1])
+ || reg_mentioned_p (op0, operands[2])
+ || reg_mentioned_p (op0, operands[3]))
+ gcc_unreachable ();
+
+ /* For 2 memory operands, pick either operands[1] or operands[3] to move into
+ the destination register. */
+ if (memory_operand (operands[1], mode))
+ {
+ emit_move_insn (op0, operands[1]);
+ operands[1] = op0;
+ }
+ else if (memory_operand (operands[3], mode))
+ {
+ emit_move_insn (op0, operands[3]);
+ operands[3] = op0;
+ }
+ else
+ gcc_unreachable ();
+
+ return;
+}
+
+
+/* Table of valid machine attributes. */
+static const struct attribute_spec ix86_attribute_table[] =
+{
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+ /* Stdcall attribute says callee is responsible for popping arguments
+ if they are not variable. */
+ { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* Fastcall attribute says callee is responsible for popping arguments
+ if they are not variable. */
+ { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* Cdecl attribute says the callee is a normal C declaration */
+ { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* Regparm attribute specifies how many integer arguments are to be
+ passed in registers. */
+ { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
+ /* Sseregparm attribute says we are using x86_64 calling conventions
+ for FP arguments. */
+ { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* force_align_arg_pointer says this function realigns the stack at entry. */
+ { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
+ false, true, true, ix86_handle_cconv_attribute },
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+ { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
+ { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
+ { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
+#endif
+ { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
+ { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
+#ifdef SUBTARGET_ATTRIBUTE_TABLE
+ SUBTARGET_ATTRIBUTE_TABLE,
+#endif
+ /* ms_abi and sysv_abi calling convention function attributes. */
+ { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
+ { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
+ /* End element. */
+ { NULL, 0, 0, false, false, false, NULL }
+};
+
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+static int
+x86_builtin_vectorization_cost (bool runtime_test)
+{
+ /* If the branch of the runtime test is taken - i.e. - the vectorized
+ version is skipped - this incurs a misprediction cost (because the
+ vectorized version is expected to be the fall-through). So we subtract
+ the latency of a mispredicted branch from the costs that are incured
+ when the vectorized version is executed.
+
+ TODO: The values in individual target tables have to be tuned or new
+ fields may be needed. For eg. on K8, the default branch path is the
+ not-taken path. If the taken path is predicted correctly, the minimum
+ penalty of going down the taken-path is 1 cycle. If the taken-path is
+ not predicted correctly, then the minimum penalty is 10 cycles. */
+
+ if (runtime_test)
+ {
+ return (-(ix86_cost->cond_taken_branch_cost));
+ }
+ else
+ return 0;
+}
+
+/* This function returns the calling abi specific va_list type node.
+ It returns the FNDECL specific va_list type. */
+
+tree
+ix86_fn_abi_va_list (tree fndecl)
+{
+ int abi;
+
+ if (!TARGET_64BIT)
+ return va_list_type_node;
+ gcc_assert (fndecl != NULL_TREE);
+ abi = ix86_function_abi ((const_tree) fndecl);
+
+ if (abi == MS_ABI)
+ return ms_va_list_type_node;
+ else
+ return sysv_va_list_type_node;
+}
+
+/* Returns the canonical va_list type specified by TYPE. If there
+ is no valid TYPE provided, it return NULL_TREE. */
+
+tree
+ix86_canonical_va_list_type (tree type)
+{
+ tree wtype, htype;
+
+ /* Resolve references and pointers to va_list type. */
+ if (INDIRECT_REF_P (type))
+ type = TREE_TYPE (type);
+ else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
+ type = TREE_TYPE (type);
+
+ if (TARGET_64BIT)
+ {
+ wtype = va_list_type_node;
+ gcc_assert (wtype != NULL_TREE);
+ htype = type;
+ if (TREE_CODE (wtype) == ARRAY_TYPE)
+ {
+ /* If va_list is an array type, the argument may have decayed
+ to a pointer type, e.g. by being passed to another function.
+ In that case, unwrap both types so that we can compare the
+ underlying records. */
+ if (TREE_CODE (htype) == ARRAY_TYPE
+ || POINTER_TYPE_P (htype))
+ {
+ wtype = TREE_TYPE (wtype);
+ htype = TREE_TYPE (htype);
+ }
+ }
+ if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
+ return va_list_type_node;
+ wtype = sysv_va_list_type_node;
+ gcc_assert (wtype != NULL_TREE);
+ htype = type;
+ if (TREE_CODE (wtype) == ARRAY_TYPE)
+ {
+ /* If va_list is an array type, the argument may have decayed
+ to a pointer type, e.g. by being passed to another function.
+ In that case, unwrap both types so that we can compare the
+ underlying records. */
+ if (TREE_CODE (htype) == ARRAY_TYPE
+ || POINTER_TYPE_P (htype))
+ {
+ wtype = TREE_TYPE (wtype);
+ htype = TREE_TYPE (htype);
+ }
+ }
+ if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
+ return sysv_va_list_type_node;
+ wtype = ms_va_list_type_node;
+ gcc_assert (wtype != NULL_TREE);
+ htype = type;
+ if (TREE_CODE (wtype) == ARRAY_TYPE)
+ {
+ /* If va_list is an array type, the argument may have decayed
+ to a pointer type, e.g. by being passed to another function.
+ In that case, unwrap both types so that we can compare the
+ underlying records. */
+ if (TREE_CODE (htype) == ARRAY_TYPE
+ || POINTER_TYPE_P (htype))
+ {
+ wtype = TREE_TYPE (wtype);
+ htype = TREE_TYPE (htype);
+ }
+ }
+ if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
+ return ms_va_list_type_node;
+ return NULL_TREE;
+ }
+ return std_canonical_va_list_type (type);
+}
+
+/* Iterate through the target-specific builtin types for va_list.
+ IDX denotes the iterator, *PTREE is set to the result type of
+ the va_list builtin, and *PNAME to its internal type.
+ Returns zero if there is no element for this index, otherwise
+ IDX should be increased upon the next call.
+ Note, do not iterate a base builtin's name like __builtin_va_list.
+ Used from c_common_nodes_and_builtins. */
+
+int
+ix86_enum_va_list (int idx, const char **pname, tree *ptree)
+{
+ if (!TARGET_64BIT)
+ return 0;
+ switch (idx) {
+ case 0:
+ *ptree = ms_va_list_type_node;
+ *pname = "__builtin_ms_va_list";
+ break;
+ case 1:
+ *ptree = sysv_va_list_type_node;
+ *pname = "__builtin_sysv_va_list";
+ break;
+ default:
+ return 0;
+ }
+ return 1;
+}
+
+/* Initialize the GCC target structure. */
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+# undef TARGET_MERGE_DECL_ATTRIBUTES
+# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
+#endif
+
+#undef TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS ix86_init_builtins
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+ ix86_builtin_vectorized_function
+
+#undef TARGET_VECTORIZE_BUILTIN_CONVERSION
+#define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
+
+#undef TARGET_BUILTIN_RECIPROCAL
+#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
+
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
+
+#undef TARGET_ENCODE_SECTION_INFO
+#ifndef SUBTARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
+#else
+#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
+#endif
+
+#undef TARGET_ASM_OPEN_PAREN
+#define TARGET_ASM_OPEN_PAREN ""
+#undef TARGET_ASM_CLOSE_PAREN
+#define TARGET_ASM_CLOSE_PAREN ""
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
+#undef TARGET_ASM_ALIGNED_SI_OP
+#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
+#ifdef ASM_QUAD
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
+#endif
+
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
+ ia32_multipass_dfa_lookahead
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
+#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
+
+#undef TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
+
+#if TARGET_MACHO
+#undef TARGET_BINDS_LOCAL_P
+#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
+#endif
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
+#undef TARGET_BINDS_LOCAL_P
+#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
+#endif
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START x86_file_start
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS \
+ (TARGET_DEFAULT \
+ | TARGET_SUBTARGET_DEFAULT \
+ | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
+
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION ix86_handle_option
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS ix86_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST ix86_address_cost
+
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
+#undef TARGET_CC_MODES_COMPATIBLE
+#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
+
+#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
+#define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
+
+#undef TARGET_FN_ABI_VA_LIST
+#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
+
+#undef TARGET_CANONICAL_VA_LIST_TYPE
+#define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
+
+#undef TARGET_MD_ASM_CLOBBERS
+#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
+#undef TARGET_INTERNAL_ARG_POINTER
+#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
+#undef TARGET_UPDATE_STACK_BOUNDARY
+#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
+#undef TARGET_GET_DRAP_RTX
+#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
+
+#undef TARGET_C_MODE_FOR_SUFFIX
+#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
+#endif
+
+#ifdef SUBTARGET_INSERT_ATTRIBUTES
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
+#endif
+
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE ix86_mangle_type
+
+#undef TARGET_STACK_PROTECT_FAIL
+#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE ix86_function_value
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
+
+#undef TARGET_SET_CURRENT_FUNCTION
+#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
+
+#undef TARGET_OPTION_VALID_ATTRIBUTE_P
+#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
+
+#undef TARGET_OPTION_SAVE
+#define TARGET_OPTION_SAVE ix86_function_specific_save
+
+#undef TARGET_OPTION_RESTORE
+#define TARGET_OPTION_RESTORE ix86_function_specific_restore
+
+#undef TARGET_OPTION_PRINT
+#define TARGET_OPTION_PRINT ix86_function_specific_print
+
+#undef TARGET_OPTION_CAN_INLINE_P
+#define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
+
+#undef TARGET_EXPAND_TO_RTL_HOOK
+#define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-i386.h"
diff --git a/gcc-4.4.0/gcc/config/i386/i386.h b/gcc-4.4.0/gcc/config/i386/i386.h
new file mode 100644
index 000000000..5c05d9dac
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/i386.h
@@ -0,0 +1,2552 @@
+/* Definitions of target machine for GCC for IA-32.
+ Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+ 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* The purpose of this file is to define the characteristics of the i386,
+ independent of assembler syntax or operating system.
+
+ Three other files build on this one to describe a specific assembler syntax:
+ bsd386.h, att386.h, and sun386.h.
+
+ The actual tm.h file for a particular system should include
+ this file, and then the file for the appropriate assembler syntax.
+
+ Many macros that specify assembler syntax are omitted entirely from
+ this file because they really belong in the files for particular
+ assemblers. These include RP, IP, LPREFIX, PUT_OP_SIZE, USE_STAR,
+ ADDR_BEG, ADDR_END, PRINT_IREG, PRINT_SCALE, PRINT_B_I_S, and many
+ that start with ASM_ or end in ASM_OP. */
+
+/* Redefines for option macros. */
+
+#define TARGET_64BIT OPTION_ISA_64BIT
+#define TARGET_MMX OPTION_ISA_MMX
+#define TARGET_3DNOW OPTION_ISA_3DNOW
+#define TARGET_3DNOW_A OPTION_ISA_3DNOW_A
+#define TARGET_SSE OPTION_ISA_SSE
+#define TARGET_SSE2 OPTION_ISA_SSE2
+#define TARGET_SSE3 OPTION_ISA_SSE3
+#define TARGET_SSSE3 OPTION_ISA_SSSE3
+#define TARGET_SSE4_1 OPTION_ISA_SSE4_1
+#define TARGET_SSE4_2 OPTION_ISA_SSE4_2
+#define TARGET_AVX OPTION_ISA_AVX
+#define TARGET_FMA OPTION_ISA_FMA
+#define TARGET_SSE4A OPTION_ISA_SSE4A
+#define TARGET_SSE5 OPTION_ISA_SSE5
+#define TARGET_ROUND OPTION_ISA_ROUND
+#define TARGET_ABM OPTION_ISA_ABM
+#define TARGET_POPCNT OPTION_ISA_POPCNT
+#define TARGET_SAHF OPTION_ISA_SAHF
+#define TARGET_AES OPTION_ISA_AES
+#define TARGET_PCLMUL OPTION_ISA_PCLMUL
+#define TARGET_CMPXCHG16B OPTION_ISA_CX16
+
+
+/* SSE5 and SSE4.1 define the same round instructions */
+#define OPTION_MASK_ISA_ROUND (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE5)
+#define OPTION_ISA_ROUND ((ix86_isa_flags & OPTION_MASK_ISA_ROUND) != 0)
+
+#include "config/vxworks-dummy.h"
+
+/* Algorithm to expand string function with. */
+enum stringop_alg
+{
+ no_stringop,
+ libcall,
+ rep_prefix_1_byte,
+ rep_prefix_4_byte,
+ rep_prefix_8_byte,
+ loop_1_byte,
+ loop,
+ unrolled_loop
+};
+
+#define NAX_STRINGOP_ALGS 4
+
+/* Specify what algorithm to use for stringops on known size.
+ When size is unknown, the UNKNOWN_SIZE alg is used. When size is
+ known at compile time or estimated via feedback, the SIZE array
+ is walked in order until MAX is greater then the estimate (or -1
+ means infinity). Corresponding ALG is used then.
+ For example initializer:
+ {{256, loop}, {-1, rep_prefix_4_byte}}
+ will use loop for blocks smaller or equal to 256 bytes, rep prefix will
+ be used otherwise. */
+struct stringop_algs
+{
+ const enum stringop_alg unknown_size;
+ const struct stringop_strategy {
+ const int max;
+ const enum stringop_alg alg;
+ } size [NAX_STRINGOP_ALGS];
+};
+
+/* Define the specific costs for a given cpu */
+
+struct processor_costs {
+ const int add; /* cost of an add instruction */
+ const int lea; /* cost of a lea instruction */
+ const int shift_var; /* variable shift costs */
+ const int shift_const; /* constant shift costs */
+ const int mult_init[5]; /* cost of starting a multiply
+ in QImode, HImode, SImode, DImode, TImode*/
+ const int mult_bit; /* cost of multiply per each bit set */
+ const int divide[5]; /* cost of a divide/mod
+ in QImode, HImode, SImode, DImode, TImode*/
+ int movsx; /* The cost of movsx operation. */
+ int movzx; /* The cost of movzx operation. */
+ const int large_insn; /* insns larger than this cost more */
+ const int move_ratio; /* The threshold of number of scalar
+ memory-to-memory move insns. */
+ const int movzbl_load; /* cost of loading using movzbl */
+ const int int_load[3]; /* cost of loading integer registers
+ in QImode, HImode and SImode relative
+ to reg-reg move (2). */
+ const int int_store[3]; /* cost of storing integer register
+ in QImode, HImode and SImode */
+ const int fp_move; /* cost of reg,reg fld/fst */
+ const int fp_load[3]; /* cost of loading FP register
+ in SFmode, DFmode and XFmode */
+ const int fp_store[3]; /* cost of storing FP register
+ in SFmode, DFmode and XFmode */
+ const int mmx_move; /* cost of moving MMX register. */
+ const int mmx_load[2]; /* cost of loading MMX register
+ in SImode and DImode */
+ const int mmx_store[2]; /* cost of storing MMX register
+ in SImode and DImode */
+ const int sse_move; /* cost of moving SSE register. */
+ const int sse_load[3]; /* cost of loading SSE register
+ in SImode, DImode and TImode*/
+ const int sse_store[3]; /* cost of storing SSE register
+ in SImode, DImode and TImode*/
+ const int mmxsse_to_integer; /* cost of moving mmxsse register to
+ integer and vice versa. */
+ const int l1_cache_size; /* size of l1 cache, in kilobytes. */
+ const int l2_cache_size; /* size of l2 cache, in kilobytes. */
+ const int prefetch_block; /* bytes moved to cache for prefetch. */
+ const int simultaneous_prefetches; /* number of parallel prefetch
+ operations. */
+ const int branch_cost; /* Default value for BRANCH_COST. */
+ const int fadd; /* cost of FADD and FSUB instructions. */
+ const int fmul; /* cost of FMUL instruction. */
+ const int fdiv; /* cost of FDIV instruction. */
+ const int fabs; /* cost of FABS instruction. */
+ const int fchs; /* cost of FCHS instruction. */
+ const int fsqrt; /* cost of FSQRT instruction. */
+ /* Specify what algorithm
+ to use for stringops on unknown size. */
+ struct stringop_algs memcpy[2], memset[2];
+ const int scalar_stmt_cost; /* Cost of any scalar operation, excluding
+ load and store. */
+ const int scalar_load_cost; /* Cost of scalar load. */
+ const int scalar_store_cost; /* Cost of scalar store. */
+ const int vec_stmt_cost; /* Cost of any vector operation, excluding
+ load, store, vector-to-scalar and
+ scalar-to-vector operation. */
+ const int vec_to_scalar_cost; /* Cost of vect-to-scalar operation. */
+ const int scalar_to_vec_cost; /* Cost of scalar-to-vector operation. */
+ const int vec_align_load_cost; /* Cost of aligned vector load. */
+ const int vec_unalign_load_cost; /* Cost of unaligned vector load. */
+ const int vec_store_cost; /* Cost of vector store. */
+ const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer
+ cost model. */
+ const int cond_not_taken_branch_cost;/* Cost of not taken branch for
+ vectorizer cost model. */
+};
+
+extern const struct processor_costs *ix86_cost;
+extern const struct processor_costs ix86_size_cost;
+
+#define ix86_cur_cost() \
+ (optimize_insn_for_size_p () ? &ix86_size_cost: ix86_cost)
+
+/* Macros used in the machine description to test the flags. */
+
+/* configure can arrange to make this 2, to force a 486. */
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT_generic
+#endif
+
+#ifndef TARGET_FPMATH_DEFAULT
+#define TARGET_FPMATH_DEFAULT \
+ (TARGET_64BIT && TARGET_SSE ? FPMATH_SSE : FPMATH_387)
+#endif
+
+#define TARGET_FLOAT_RETURNS_IN_80387 TARGET_FLOAT_RETURNS
+
+/* 64bit Sledgehammer mode. For libgcc2 we make sure this is a
+ compile-time constant. */
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __x86_64__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#else
+#ifndef TARGET_BI_ARCH
+#undef TARGET_64BIT
+#if TARGET_64BIT_DEFAULT
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+#endif
+
+#define HAS_LONG_COND_BRANCH 1
+#define HAS_LONG_UNCOND_BRANCH 1
+
+#define TARGET_386 (ix86_tune == PROCESSOR_I386)
+#define TARGET_486 (ix86_tune == PROCESSOR_I486)
+#define TARGET_PENTIUM (ix86_tune == PROCESSOR_PENTIUM)
+#define TARGET_PENTIUMPRO (ix86_tune == PROCESSOR_PENTIUMPRO)
+#define TARGET_GEODE (ix86_tune == PROCESSOR_GEODE)
+#define TARGET_K6 (ix86_tune == PROCESSOR_K6)
+#define TARGET_ATHLON (ix86_tune == PROCESSOR_ATHLON)
+#define TARGET_PENTIUM4 (ix86_tune == PROCESSOR_PENTIUM4)
+#define TARGET_K8 (ix86_tune == PROCESSOR_K8)
+#define TARGET_ATHLON_K8 (TARGET_K8 || TARGET_ATHLON)
+#define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA)
+#define TARGET_CORE2 (ix86_tune == PROCESSOR_CORE2)
+#define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32)
+#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
+#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
+#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
+
+/* Feature tests against the various tunings. */
+enum ix86_tune_indices {
+ X86_TUNE_USE_LEAVE,
+ X86_TUNE_PUSH_MEMORY,
+ X86_TUNE_ZERO_EXTEND_WITH_AND,
+ X86_TUNE_UNROLL_STRLEN,
+ X86_TUNE_DEEP_BRANCH_PREDICTION,
+ X86_TUNE_BRANCH_PREDICTION_HINTS,
+ X86_TUNE_DOUBLE_WITH_ADD,
+ X86_TUNE_USE_SAHF,
+ X86_TUNE_MOVX,
+ X86_TUNE_PARTIAL_REG_STALL,
+ X86_TUNE_PARTIAL_FLAG_REG_STALL,
+ X86_TUNE_USE_HIMODE_FIOP,
+ X86_TUNE_USE_SIMODE_FIOP,
+ X86_TUNE_USE_MOV0,
+ X86_TUNE_USE_CLTD,
+ X86_TUNE_USE_XCHGB,
+ X86_TUNE_SPLIT_LONG_MOVES,
+ X86_TUNE_READ_MODIFY_WRITE,
+ X86_TUNE_READ_MODIFY,
+ X86_TUNE_PROMOTE_QIMODE,
+ X86_TUNE_FAST_PREFIX,
+ X86_TUNE_SINGLE_STRINGOP,
+ X86_TUNE_QIMODE_MATH,
+ X86_TUNE_HIMODE_MATH,
+ X86_TUNE_PROMOTE_QI_REGS,
+ X86_TUNE_PROMOTE_HI_REGS,
+ X86_TUNE_ADD_ESP_4,
+ X86_TUNE_ADD_ESP_8,
+ X86_TUNE_SUB_ESP_4,
+ X86_TUNE_SUB_ESP_8,
+ X86_TUNE_INTEGER_DFMODE_MOVES,
+ X86_TUNE_PARTIAL_REG_DEPENDENCY,
+ X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY,
+ X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL,
+ X86_TUNE_SSE_SPLIT_REGS,
+ X86_TUNE_SSE_TYPELESS_STORES,
+ X86_TUNE_SSE_LOAD0_BY_PXOR,
+ X86_TUNE_MEMORY_MISMATCH_STALL,
+ X86_TUNE_PROLOGUE_USING_MOVE,
+ X86_TUNE_EPILOGUE_USING_MOVE,
+ X86_TUNE_SHIFT1,
+ X86_TUNE_USE_FFREEP,
+ X86_TUNE_INTER_UNIT_MOVES,
+ X86_TUNE_INTER_UNIT_CONVERSIONS,
+ X86_TUNE_FOUR_JUMP_LIMIT,
+ X86_TUNE_SCHEDULE,
+ X86_TUNE_USE_BT,
+ X86_TUNE_USE_INCDEC,
+ X86_TUNE_PAD_RETURNS,
+ X86_TUNE_EXT_80387_CONSTANTS,
+ X86_TUNE_SHORTEN_X87_SSE,
+ X86_TUNE_AVOID_VECTOR_DECODE,
+ X86_TUNE_PROMOTE_HIMODE_IMUL,
+ X86_TUNE_SLOW_IMUL_IMM32_MEM,
+ X86_TUNE_SLOW_IMUL_IMM8,
+ X86_TUNE_MOVE_M1_VIA_OR,
+ X86_TUNE_NOT_UNPAIRABLE,
+ X86_TUNE_NOT_VECTORMODE,
+ X86_TUNE_USE_VECTOR_FP_CONVERTS,
+ X86_TUNE_USE_VECTOR_CONVERTS,
+ X86_TUNE_FUSE_CMP_AND_BRANCH,
+
+ X86_TUNE_LAST
+};
+
+extern unsigned char ix86_tune_features[X86_TUNE_LAST];
+
+#define TARGET_USE_LEAVE ix86_tune_features[X86_TUNE_USE_LEAVE]
+#define TARGET_PUSH_MEMORY ix86_tune_features[X86_TUNE_PUSH_MEMORY]
+#define TARGET_ZERO_EXTEND_WITH_AND \
+ ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
+#define TARGET_UNROLL_STRLEN ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
+#define TARGET_DEEP_BRANCH_PREDICTION \
+ ix86_tune_features[X86_TUNE_DEEP_BRANCH_PREDICTION]
+#define TARGET_BRANCH_PREDICTION_HINTS \
+ ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
+#define TARGET_DOUBLE_WITH_ADD ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
+#define TARGET_USE_SAHF ix86_tune_features[X86_TUNE_USE_SAHF]
+#define TARGET_MOVX ix86_tune_features[X86_TUNE_MOVX]
+#define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
+#define TARGET_PARTIAL_FLAG_REG_STALL \
+ ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
+#define TARGET_USE_HIMODE_FIOP ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP]
+#define TARGET_USE_SIMODE_FIOP ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP]
+#define TARGET_USE_MOV0 ix86_tune_features[X86_TUNE_USE_MOV0]
+#define TARGET_USE_CLTD ix86_tune_features[X86_TUNE_USE_CLTD]
+#define TARGET_USE_XCHGB ix86_tune_features[X86_TUNE_USE_XCHGB]
+#define TARGET_SPLIT_LONG_MOVES ix86_tune_features[X86_TUNE_SPLIT_LONG_MOVES]
+#define TARGET_READ_MODIFY_WRITE ix86_tune_features[X86_TUNE_READ_MODIFY_WRITE]
+#define TARGET_READ_MODIFY ix86_tune_features[X86_TUNE_READ_MODIFY]
+#define TARGET_PROMOTE_QImode ix86_tune_features[X86_TUNE_PROMOTE_QIMODE]
+#define TARGET_FAST_PREFIX ix86_tune_features[X86_TUNE_FAST_PREFIX]
+#define TARGET_SINGLE_STRINGOP ix86_tune_features[X86_TUNE_SINGLE_STRINGOP]
+#define TARGET_QIMODE_MATH ix86_tune_features[X86_TUNE_QIMODE_MATH]
+#define TARGET_HIMODE_MATH ix86_tune_features[X86_TUNE_HIMODE_MATH]
+#define TARGET_PROMOTE_QI_REGS ix86_tune_features[X86_TUNE_PROMOTE_QI_REGS]
+#define TARGET_PROMOTE_HI_REGS ix86_tune_features[X86_TUNE_PROMOTE_HI_REGS]
+#define TARGET_ADD_ESP_4 ix86_tune_features[X86_TUNE_ADD_ESP_4]
+#define TARGET_ADD_ESP_8 ix86_tune_features[X86_TUNE_ADD_ESP_8]
+#define TARGET_SUB_ESP_4 ix86_tune_features[X86_TUNE_SUB_ESP_4]
+#define TARGET_SUB_ESP_8 ix86_tune_features[X86_TUNE_SUB_ESP_8]
+#define TARGET_INTEGER_DFMODE_MOVES \
+ ix86_tune_features[X86_TUNE_INTEGER_DFMODE_MOVES]
+#define TARGET_PARTIAL_REG_DEPENDENCY \
+ ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY]
+#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
+ ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY]
+#define TARGET_SSE_UNALIGNED_MOVE_OPTIMAL \
+ ix86_tune_features[X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL]
+#define TARGET_SSE_SPLIT_REGS ix86_tune_features[X86_TUNE_SSE_SPLIT_REGS]
+#define TARGET_SSE_TYPELESS_STORES \
+ ix86_tune_features[X86_TUNE_SSE_TYPELESS_STORES]
+#define TARGET_SSE_LOAD0_BY_PXOR ix86_tune_features[X86_TUNE_SSE_LOAD0_BY_PXOR]
+#define TARGET_MEMORY_MISMATCH_STALL \
+ ix86_tune_features[X86_TUNE_MEMORY_MISMATCH_STALL]
+#define TARGET_PROLOGUE_USING_MOVE \
+ ix86_tune_features[X86_TUNE_PROLOGUE_USING_MOVE]
+#define TARGET_EPILOGUE_USING_MOVE \
+ ix86_tune_features[X86_TUNE_EPILOGUE_USING_MOVE]
+#define TARGET_SHIFT1 ix86_tune_features[X86_TUNE_SHIFT1]
+#define TARGET_USE_FFREEP ix86_tune_features[X86_TUNE_USE_FFREEP]
+#define TARGET_INTER_UNIT_MOVES ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES]
+#define TARGET_INTER_UNIT_CONVERSIONS\
+ ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
+#define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
+#define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE]
+#define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT]
+#define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC]
+#define TARGET_PAD_RETURNS ix86_tune_features[X86_TUNE_PAD_RETURNS]
+#define TARGET_EXT_80387_CONSTANTS \
+ ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
+#define TARGET_SHORTEN_X87_SSE ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE]
+#define TARGET_AVOID_VECTOR_DECODE \
+ ix86_tune_features[X86_TUNE_AVOID_VECTOR_DECODE]
+#define TARGET_TUNE_PROMOTE_HIMODE_IMUL \
+ ix86_tune_features[X86_TUNE_PROMOTE_HIMODE_IMUL]
+#define TARGET_SLOW_IMUL_IMM32_MEM \
+ ix86_tune_features[X86_TUNE_SLOW_IMUL_IMM32_MEM]
+#define TARGET_SLOW_IMUL_IMM8 ix86_tune_features[X86_TUNE_SLOW_IMUL_IMM8]
+#define TARGET_MOVE_M1_VIA_OR ix86_tune_features[X86_TUNE_MOVE_M1_VIA_OR]
+#define TARGET_NOT_UNPAIRABLE ix86_tune_features[X86_TUNE_NOT_UNPAIRABLE]
+#define TARGET_NOT_VECTORMODE ix86_tune_features[X86_TUNE_NOT_VECTORMODE]
+#define TARGET_USE_VECTOR_FP_CONVERTS \
+ ix86_tune_features[X86_TUNE_USE_VECTOR_FP_CONVERTS]
+#define TARGET_USE_VECTOR_CONVERTS \
+ ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
+#define TARGET_FUSE_CMP_AND_BRANCH \
+ ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH]
+
+/* Feature tests against the various architecture variations. */
+enum ix86_arch_indices {
+ X86_ARCH_CMOVE, /* || TARGET_SSE */
+ X86_ARCH_CMPXCHG,
+ X86_ARCH_CMPXCHG8B,
+ X86_ARCH_XADD,
+ X86_ARCH_BSWAP,
+
+ X86_ARCH_LAST
+};
+
+extern unsigned char ix86_arch_features[X86_ARCH_LAST];
+
+#define TARGET_CMOVE ix86_arch_features[X86_ARCH_CMOVE]
+#define TARGET_CMPXCHG ix86_arch_features[X86_ARCH_CMPXCHG]
+#define TARGET_CMPXCHG8B ix86_arch_features[X86_ARCH_CMPXCHG8B]
+#define TARGET_XADD ix86_arch_features[X86_ARCH_XADD]
+#define TARGET_BSWAP ix86_arch_features[X86_ARCH_BSWAP]
+
+#define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387)
+
+extern int x86_prefetch_sse;
+
+#define TARGET_PREFETCH_SSE x86_prefetch_sse
+
+#define ASSEMBLER_DIALECT (ix86_asm_dialect)
+
+#define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0)
+#define TARGET_MIX_SSE_I387 \
+ ((ix86_fpmath & (FPMATH_SSE | FPMATH_387)) == (FPMATH_SSE | FPMATH_387))
+
+#define TARGET_GNU_TLS (ix86_tls_dialect == TLS_DIALECT_GNU)
+#define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2)
+#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS)
+#define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN)
+
+extern int ix86_isa_flags;
+
+#ifndef TARGET_64BIT_DEFAULT
+#define TARGET_64BIT_DEFAULT 0
+#endif
+#ifndef TARGET_TLS_DIRECT_SEG_REFS_DEFAULT
+#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0
+#endif
+
+/* Fence to use after loop using storent. */
+
+extern tree x86_mfence;
+#define FENCE_FOLLOWING_MOVNT x86_mfence
+
+/* Once GDB has been enhanced to deal with functions without frame
+ pointers, we can change this to allow for elimination of
+ the frame pointer in leaf functions. */
+#define TARGET_DEFAULT 0
+
+/* Extra bits to force. */
+#define TARGET_SUBTARGET_DEFAULT 0
+#define TARGET_SUBTARGET_ISA_DEFAULT 0
+
+/* Extra bits to force on w/ 32-bit mode. */
+#define TARGET_SUBTARGET32_DEFAULT 0
+#define TARGET_SUBTARGET32_ISA_DEFAULT 0
+
+/* Extra bits to force on w/ 64-bit mode. */
+#define TARGET_SUBTARGET64_DEFAULT 0
+#define TARGET_SUBTARGET64_ISA_DEFAULT 0
+
+/* This is not really a target flag, but is done this way so that
+ it's analogous to similar code for Mach-O on PowerPC. darwin.h
+ redefines this to 1. */
+#define TARGET_MACHO 0
+
+/* Likewise, for the Windows 64-bit ABI. */
+#define TARGET_64BIT_MS_ABI (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
+
+/* Available call abi. */
+enum calling_abi
+{
+ SYSV_ABI = 0,
+ MS_ABI = 1
+};
+
+/* The default abi form used by target. */
+#define DEFAULT_ABI SYSV_ABI
+
+/* Subtargets may reset this to 1 in order to enable 96-bit long double
+ with the rounding mode forced to 53 bits. */
+#define TARGET_96_ROUND_53_LONG_DOUBLE 0
+
+/* Sometimes certain combinations of command options do not make
+ sense on a particular target machine. You can define a macro
+ `OVERRIDE_OPTIONS' to take account of this. This macro, if
+ defined, is executed once just after all the command options have
+ been parsed.
+
+ Don't use this macro to turn on various extra optimizations for
+ `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
+
+#define OVERRIDE_OPTIONS override_options (true)
+
+/* Define this to change the optimizations performed by default. */
+#define OPTIMIZATION_OPTIONS(LEVEL, SIZE) \
+ optimization_options ((LEVEL), (SIZE))
+
+/* -march=native handling only makes sense with compiler running on
+ an x86 or x86_64 chip. If changing this condition, also change
+ the condition in driver-i386.c. */
+#if defined(__i386__) || defined(__x86_64__)
+/* In driver-i386.c. */
+extern const char *host_detect_local_cpu (int argc, const char **argv);
+#define EXTRA_SPEC_FUNCTIONS \
+ { "local_cpu_detect", host_detect_local_cpu },
+#define HAVE_LOCAL_CPU_DETECT
+#endif
+
+#if TARGET_64BIT_DEFAULT
+#define OPT_ARCH64 "!m32"
+#define OPT_ARCH32 "m32"
+#else
+#define OPT_ARCH64 "m64"
+#define OPT_ARCH32 "!m64"
+#endif
+
+/* Support for configure-time defaults of some command line options.
+ The order here is important so that -march doesn't squash the
+ tune or cpu values. */
+#define OPTION_DEFAULT_SPECS \
+ {"tune", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" }, \
+ {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+ {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+ {"cpu", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" }, \
+ {"cpu_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+ {"cpu_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+ {"arch", "%{!march=*:-march=%(VALUE)}"}, \
+ {"arch_32", "%{" OPT_ARCH32 ":%{!march=*:-march=%(VALUE)}}"}, \
+ {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"},
+
+/* Specs for the compiler proper */
+
+#ifndef CC1_CPU_SPEC
+#define CC1_CPU_SPEC_1 "\
+%{mcpu=*:-mtune=%* \
+%n`-mcpu=' is deprecated. Use `-mtune=' or '-march=' instead.\n} \
+%<mcpu=* \
+%{mintel-syntax:-masm=intel \
+%n`-mintel-syntax' is deprecated. Use `-masm=intel' instead.\n} \
+%{mno-intel-syntax:-masm=att \
+%n`-mno-intel-syntax' is deprecated. Use `-masm=att' instead.\n}"
+
+#ifndef HAVE_LOCAL_CPU_DETECT
+#define CC1_CPU_SPEC CC1_CPU_SPEC_1
+#else
+#define CC1_CPU_SPEC CC1_CPU_SPEC_1 \
+"%{march=native:%<march=native %:local_cpu_detect(arch) \
+ %{!mtune=*:%<mtune=native %:local_cpu_detect(tune)}} \
+%{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#endif
+#endif
+
+/* Target CPU builtins. */
+#define TARGET_CPU_CPP_BUILTINS() ix86_target_macros ()
+
+/* Target Pragmas. */
+#define REGISTER_TARGET_PRAGMAS() ix86_register_pragmas ()
+
+enum target_cpu_default
+{
+ TARGET_CPU_DEFAULT_generic = 0,
+
+ TARGET_CPU_DEFAULT_i386,
+ TARGET_CPU_DEFAULT_i486,
+ TARGET_CPU_DEFAULT_pentium,
+ TARGET_CPU_DEFAULT_pentium_mmx,
+ TARGET_CPU_DEFAULT_pentiumpro,
+ TARGET_CPU_DEFAULT_pentium2,
+ TARGET_CPU_DEFAULT_pentium3,
+ TARGET_CPU_DEFAULT_pentium4,
+ TARGET_CPU_DEFAULT_pentium_m,
+ TARGET_CPU_DEFAULT_prescott,
+ TARGET_CPU_DEFAULT_nocona,
+ TARGET_CPU_DEFAULT_core2,
+
+ TARGET_CPU_DEFAULT_geode,
+ TARGET_CPU_DEFAULT_k6,
+ TARGET_CPU_DEFAULT_k6_2,
+ TARGET_CPU_DEFAULT_k6_3,
+ TARGET_CPU_DEFAULT_athlon,
+ TARGET_CPU_DEFAULT_athlon_sse,
+ TARGET_CPU_DEFAULT_k8,
+ TARGET_CPU_DEFAULT_amdfam10,
+
+ TARGET_CPU_DEFAULT_max
+};
+
+#ifndef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu) "
+#endif
+
+/* This macro defines names of additional specifications to put in the
+ specs that can be used in various specifications like CC1_SPEC. Its
+ definition is an initializer with a subgrouping for each command option.
+
+ Each subgrouping contains a string constant, that defines the
+ specification name, and a string constant that used by the GCC driver
+ program.
+
+ Do not define this macro if it does not need to do anything. */
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#define EXTRA_SPECS \
+ { "cc1_cpu", CC1_CPU_SPEC }, \
+ SUBTARGET_EXTRA_SPECS
+
+
+/* Set the value of FLT_EVAL_METHOD in float.h. When using only the
+ FPU, assume that the fpcw is set to extended precision; when using
+ only SSE, rounding is correct; when using both SSE and the FPU,
+ the rounding precision is indeterminate, since either may be chosen
+ apparently at random. */
+#define TARGET_FLT_EVAL_METHOD \
+ (TARGET_MIX_SSE_I387 ? -1 : TARGET_SSE_MATH ? 0 : 2)
+
+/* target machine storage layout */
+
+#define SHORT_TYPE_SIZE 16
+#define INT_TYPE_SIZE 32
+#define FLOAT_TYPE_SIZE 32
+#define LONG_TYPE_SIZE BITS_PER_WORD
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_LONG_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 80
+
+#define WIDEST_HARDWARE_FP_SIZE LONG_DOUBLE_TYPE_SIZE
+
+#if defined (TARGET_BI_ARCH) || TARGET_64BIT_DEFAULT
+#define MAX_BITS_PER_WORD 64
+#else
+#define MAX_BITS_PER_WORD 32
+#endif
+
+/* Define this if most significant byte of a word is the lowest numbered. */
+/* That is true on the 80386. */
+
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered. */
+/* That is not true on the 80386. */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is the lowest
+ numbered. */
+/* Not true for 80386 */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes). */
+#define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4)
+#ifdef IN_LIBGCC2
+#define MIN_UNITS_PER_WORD (TARGET_64BIT ? 8 : 4)
+#else
+#define MIN_UNITS_PER_WORD 4
+#endif
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list. */
+#define PARM_BOUNDARY BITS_PER_WORD
+
+/* Boundary (in *bits*) on which stack pointer should be aligned. */
+#define STACK_BOUNDARY \
+ (TARGET_64BIT && DEFAULT_ABI == MS_ABI ? 128 : BITS_PER_WORD)
+
+/* Stack boundary of the main function guaranteed by OS. */
+#define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32)
+
+/* Minimum stack boundary. */
+#define MIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32)
+
+/* Boundary (in *bits*) on which the stack pointer prefers to be
+ aligned; the compiler cannot rely on having this alignment. */
+#define PREFERRED_STACK_BOUNDARY ix86_preferred_stack_boundary
+
+/* It should be MIN_STACK_BOUNDARY. But we set it to 128 bits for
+ both 32bit and 64bit, to support codes that need 128 bit stack
+ alignment for SSE instructions, but can't realign the stack. */
+#define PREFERRED_STACK_BOUNDARY_DEFAULT 128
+
+/* 1 if -mstackrealign should be turned on by default. It will
+ generate an alternate prologue and epilogue that realigns the
+ runtime stack if nessary. This supports mixing codes that keep a
+ 4-byte aligned stack, as specified by i386 psABI, with codes that
+ need a 16-byte aligned stack, as required by SSE instructions. If
+ STACK_REALIGN_DEFAULT is 1 and PREFERRED_STACK_BOUNDARY_DEFAULT is
+ 128, stacks for all functions may be realigned. */
+#define STACK_REALIGN_DEFAULT 0
+
+/* Boundary (in *bits*) on which the incoming stack is aligned. */
+#define INCOMING_STACK_BOUNDARY ix86_incoming_stack_boundary
+
+/* Target OS keeps a vector-aligned (128-bit, 16-byte) stack. This is
+ mandatory for the 64-bit ABI, and may or may not be true for other
+ operating systems. */
+#define TARGET_KEEPS_VECTOR_ALIGNED_STACK TARGET_64BIT
+
+/* Minimum allocation boundary for the code of a function. */
+#define FUNCTION_BOUNDARY 8
+
+/* C++ stores the virtual bit in the lowest bit of function pointers. */
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION ptrmemfunc_vbit_in_pfn
+
+/* Alignment of field after `int : 0' in a structure. */
+
+#define EMPTY_FIELD_BOUNDARY BITS_PER_WORD
+
+/* Minimum size in bits of the largest boundary to which any
+ and all fundamental data types supported by the hardware
+ might need to be aligned. No data type wants to be aligned
+ rounder than this.
+
+ Pentium+ prefers DFmode values to be aligned to 64 bit boundary
+ and Pentium Pro XFmode values at 128 bit boundaries. */
+
+#define BIGGEST_ALIGNMENT (TARGET_AVX ? 256: 128)
+
+/* Maximum stack alignment. */
+#define MAX_STACK_ALIGNMENT MAX_OFILE_ALIGNMENT
+
+/* Alignment value for attribute ((aligned)). It is a constant since
+ it is the part of the ABI. We shouldn't change it with -mavx. */
+#define ATTRIBUTE_ALIGNED_VALUE 128
+
+/* Decide whether a variable of mode MODE should be 128 bit aligned. */
+#define ALIGN_MODE_128(MODE) \
+ ((MODE) == XFmode || SSE_REG_MODE_P (MODE))
+
+/* The published ABIs say that doubles should be aligned on word
+ boundaries, so lower the alignment for structure fields unless
+ -malign-double is set. */
+
+/* ??? Blah -- this macro is used directly by libobjc. Since it
+ supports no vector modes, cut out the complexity and fall back
+ on BIGGEST_FIELD_ALIGNMENT. */
+#ifdef IN_TARGET_LIBS
+#ifdef __x86_64__
+#define BIGGEST_FIELD_ALIGNMENT 128
+#else
+#define BIGGEST_FIELD_ALIGNMENT 32
+#endif
+#else
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \
+ x86_field_alignment (FIELD, COMPUTED)
+#endif
+
+/* If defined, a C expression to compute the alignment given to a
+ constant that is being placed in memory. EXP is the constant
+ and ALIGN is the alignment that the object would ordinarily have.
+ The value of this macro is used instead of that alignment to align
+ the object.
+
+ If this macro is not defined, then ALIGN is used.
+
+ The typical use of this macro is to increase alignment for string
+ constants to be word aligned so that `strcpy' calls that copy
+ constants can be done inline. */
+
+#define CONSTANT_ALIGNMENT(EXP, ALIGN) ix86_constant_alignment ((EXP), (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a static
+ variable. TYPE is the data type, and ALIGN is the alignment that
+ the object would ordinarily have. The value of this macro is used
+ instead of that alignment to align the object.
+
+ If this macro is not defined, then ALIGN is used.
+
+ One use of this macro is to increase alignment of medium-size
+ data to make it all fit in fewer cache lines. Another is to
+ cause character arrays to be word-aligned so that `strcpy' calls
+ that copy constants to character arrays can be done inline. */
+
+#define DATA_ALIGNMENT(TYPE, ALIGN) ix86_data_alignment ((TYPE), (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a local
+ variable. TYPE is the data type, and ALIGN is the alignment that
+ the object would ordinarily have. The value of this macro is used
+ instead of that alignment to align the object.
+
+ If this macro is not defined, then ALIGN is used.
+
+ One use of this macro is to increase alignment of medium-size
+ data to make it all fit in fewer cache lines. */
+
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+ ix86_local_alignment ((TYPE), VOIDmode, (ALIGN))
+
+/* If defined, a C expression to compute the alignment for stack slot.
+ TYPE is the data type, MODE is the widest mode available, and ALIGN
+ is the alignment that the slot would ordinarily have. The value of
+ this macro is used instead of that alignment to align the slot.
+
+ If this macro is not defined, then ALIGN is used when TYPE is NULL,
+ Otherwise, LOCAL_ALIGNMENT will be used.
+
+ One use of this macro is to set alignment of stack slot to the
+ maximum alignment of all possible modes which the slot may have. */
+
+#define STACK_SLOT_ALIGNMENT(TYPE, MODE, ALIGN) \
+ ix86_local_alignment ((TYPE), (MODE), (ALIGN))
+
+/* If defined, a C expression to compute the alignment for a local
+ variable DECL.
+
+ If this macro is not defined, then
+ LOCAL_ALIGNMENT (TREE_TYPE (DECL), DECL_ALIGN (DECL)) will be used.
+
+ One use of this macro is to increase alignment of medium-size
+ data to make it all fit in fewer cache lines. */
+
+#define LOCAL_DECL_ALIGNMENT(DECL) \
+ ix86_local_alignment ((DECL), VOIDmode, DECL_ALIGN (DECL))
+
+/* If defined, a C expression to compute the minimum required alignment
+ for dynamic stack realignment purposes for EXP (a TYPE or DECL),
+ MODE, assuming normal alignment ALIGN.
+
+ If this macro is not defined, then (ALIGN) will be used. */
+
+#define MINIMUM_ALIGNMENT(EXP, MODE, ALIGN) \
+ ix86_minimum_alignment (EXP, MODE, ALIGN)
+
+
+/* If defined, a C expression that gives the alignment boundary, in
+ bits, of an argument with the specified mode and type. If it is
+ not defined, `PARM_BOUNDARY' is used for all arguments. */
+
+#define FUNCTION_ARG_BOUNDARY(MODE, TYPE) \
+ ix86_function_arg_boundary ((MODE), (TYPE))
+
+/* Set this nonzero if move instructions will actually fail to work
+ when given unaligned data. */
+#define STRICT_ALIGNMENT 0
+
+/* If bit field type is int, don't let it cross an int,
+ and give entire struct the alignment of an int. */
+/* Required on the 386 since it doesn't have bit-field insns. */
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* Standard register usage. */
+
+/* This processor has special stack-like registers. See reg-stack.c
+ for details. */
+
+#define STACK_REGS
+
+#define IS_STACK_MODE(MODE) \
+ (((MODE) == SFmode && (!TARGET_SSE || !TARGET_SSE_MATH)) \
+ || ((MODE) == DFmode && (!TARGET_SSE2 || !TARGET_SSE_MATH)) \
+ || (MODE) == XFmode)
+
+/* Number of actual hardware registers.
+ The hardware registers are assigned numbers for the compiler
+ from 0 to just below FIRST_PSEUDO_REGISTER.
+ All registers that the compiler knows about must be given numbers,
+ even those that are not normally considered general registers.
+
+ In the 80386 we give the 8 general purpose registers the numbers 0-7.
+ We number the floating point registers 8-15.
+ Note that registers 0-7 can be accessed as a short or int,
+ while only 0-3 may be used with byte `mov' instructions.
+
+ Reg 16 does not correspond to any hardware register, but instead
+ appears in the RTL as an argument pointer prior to reload, and is
+ eliminated during reloading in favor of either the stack or frame
+ pointer. */
+
+#define FIRST_PSEUDO_REGISTER 53
+
+/* Number of hardware registers that go into the DWARF-2 unwind info.
+ If not defined, equals FIRST_PSEUDO_REGISTER. */
+
+#define DWARF_FRAME_REGISTERS 17
+
+/* 1 for registers that have pervasive standard uses
+ and are not available for the register allocator.
+ On the 80386, the stack pointer is such, as is the arg pointer.
+
+ The value is zero if the register is not fixed on either 32 or
+ 64 bit targets, one if the register if fixed on both 32 and 64
+ bit targets, two if it is only fixed on 32bit targets and three
+ if its only fixed on 64bit targets.
+ Proper values are computed in the CONDITIONAL_REGISTER_USAGE.
+ */
+#define FIXED_REGISTERS \
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/ \
+{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, \
+/*arg,flags,fpsr,fpcr,frame*/ \
+ 1, 1, 1, 1, 1, \
+/*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/ \
+ 0, 0, 0, 0, 0, 0, 0, 0, \
+/* mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7*/ \
+ 0, 0, 0, 0, 0, 0, 0, 0, \
+/* r8, r9, r10, r11, r12, r13, r14, r15*/ \
+ 2, 2, 2, 2, 2, 2, 2, 2, \
+/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \
+ 2, 2, 2, 2, 2, 2, 2, 2 }
+
+
+/* 1 for registers not available across function calls.
+ These must include the FIXED_REGISTERS and also any
+ registers that can be used without being saved.
+ The latter must include the registers where values are returned
+ and the register where structure-value addresses are passed.
+ Aside from that, you can include as many other registers as you like.
+
+ The value is zero if the register is not call used on either 32 or
+ 64 bit targets, one if the register if call used on both 32 and 64
+ bit targets, two if it is only call used on 32bit targets and three
+ if its only call used on 64bit targets.
+ Proper values are computed in the CONDITIONAL_REGISTER_USAGE.
+*/
+#define CALL_USED_REGISTERS \
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7*/ \
+{ 1, 1, 1, 0, 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+/*arg,flags,fpsr,fpcr,frame*/ \
+ 1, 1, 1, 1, 1, \
+/*xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7*/ \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+/* mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7*/ \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+/* r8, r9, r10, r11, r12, r13, r14, r15*/ \
+ 1, 1, 1, 1, 2, 2, 2, 2, \
+/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \
+ 1, 1, 1, 1, 1, 1, 1, 1 }
+
+/* Order in which to allocate registers. Each register must be
+ listed once, even those in FIXED_REGISTERS. List frame pointer
+ late and fixed registers last. Note that, in general, we prefer
+ registers listed in CALL_USED_REGISTERS, keeping the others
+ available for storage of persistent values.
+
+ The ORDER_REGS_FOR_LOCAL_ALLOC actually overwrite the order,
+ so this is just empty initializer for array. */
+
+#define REG_ALLOC_ORDER \
+{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,\
+ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, \
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, \
+ 48, 49, 50, 51, 52 }
+
+/* ORDER_REGS_FOR_LOCAL_ALLOC is a macro which permits reg_alloc_order
+ to be rearranged based on a particular function. When using sse math,
+ we want to allocate SSE before x87 registers and vice versa. */
+
+#define ORDER_REGS_FOR_LOCAL_ALLOC x86_order_regs_for_local_alloc ()
+
+
+#define OVERRIDE_ABI_FORMAT(FNDECL) ix86_call_abi_override (FNDECL)
+
+/* Macro to conditionally modify fixed_regs/call_used_regs. */
+#define CONDITIONAL_REGISTER_USAGE \
+do { \
+ int i; \
+ unsigned int j; \
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) \
+ { \
+ if (fixed_regs[i] > 1) \
+ fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2)); \
+ if (call_used_regs[i] > 1) \
+ call_used_regs[i] = (call_used_regs[i] \
+ == (TARGET_64BIT ? 3 : 2)); \
+ } \
+ j = PIC_OFFSET_TABLE_REGNUM; \
+ if (j != INVALID_REGNUM) \
+ fixed_regs[j] = call_used_regs[j] = 1; \
+ if (TARGET_64BIT \
+ && ((cfun && cfun->machine->call_abi == MS_ABI) \
+ || (!cfun && DEFAULT_ABI == MS_ABI))) \
+ { \
+ call_used_regs[SI_REG] = 0; \
+ call_used_regs[DI_REG] = 0; \
+ call_used_regs[XMM6_REG] = 0; \
+ call_used_regs[XMM7_REG] = 0; \
+ for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) \
+ call_used_regs[i] = 0; \
+ } \
+ if (! TARGET_MMX) \
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) \
+ if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i)) \
+ fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; \
+ if (! TARGET_SSE) \
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) \
+ if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i)) \
+ fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; \
+ if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) \
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) \
+ if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i)) \
+ fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; \
+ if (! TARGET_64BIT) \
+ { \
+ for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) \
+ reg_names[i] = ""; \
+ for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) \
+ reg_names[i] = ""; \
+ } \
+ } while (0)
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+ to hold something of mode MODE.
+ This is ordinarily the length in words of a value of mode MODE
+ but can be less for certain modes in special long registers.
+
+ Actually there are no two word move instructions for consecutive
+ registers. And only registers 0-3 may have mov byte instructions
+ applied to them.
+ */
+
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+ (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \
+ ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \
+ : ((MODE) == XFmode \
+ ? (TARGET_64BIT ? 2 : 3) \
+ : (MODE) == XCmode \
+ ? (TARGET_64BIT ? 4 : 6) \
+ : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)))
+
+#define HARD_REGNO_NREGS_HAS_PADDING(REGNO, MODE) \
+ ((TARGET_128BIT_LONG_DOUBLE && !TARGET_64BIT) \
+ ? (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \
+ ? 0 \
+ : ((MODE) == XFmode || (MODE) == XCmode)) \
+ : 0)
+
+#define HARD_REGNO_NREGS_WITH_PADDING(REGNO, MODE) ((MODE) == XFmode ? 4 : 8)
+
+#define VALID_AVX256_REG_MODE(MODE) \
+ ((MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode \
+ || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode)
+
+#define VALID_SSE2_REG_MODE(MODE) \
+ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
+ || (MODE) == V2DImode || (MODE) == DFmode)
+
+#define VALID_SSE_REG_MODE(MODE) \
+ ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode \
+ || (MODE) == SFmode || (MODE) == TFmode)
+
+#define VALID_MMX_REG_MODE_3DNOW(MODE) \
+ ((MODE) == V2SFmode || (MODE) == SFmode)
+
+#define VALID_MMX_REG_MODE(MODE) \
+ ((MODE == V1DImode) || (MODE) == DImode \
+ || (MODE) == V2SImode || (MODE) == SImode \
+ || (MODE) == V4HImode || (MODE) == V8QImode)
+
+/* ??? No autovectorization into MMX or 3DNOW until we can reliably
+ place emms and femms instructions.
+ FIXME: AVX has 32byte floating point vector operations and 16byte
+ integer vector operations. But vectorizer doesn't support
+ different sizes for integer and floating point vectors. We limit
+ vector size to 16byte. */
+#define UNITS_PER_SIMD_WORD(MODE) \
+ (TARGET_AVX ? (((MODE) == DFmode || (MODE) == SFmode) ? 16 : 16) \
+ : (TARGET_SSE ? 16 : UNITS_PER_WORD))
+
+#define VALID_DFP_MODE_P(MODE) \
+ ((MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode)
+
+#define VALID_FP_MODE_P(MODE) \
+ ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \
+ || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode) \
+
+#define VALID_INT_MODE_P(MODE) \
+ ((MODE) == QImode || (MODE) == HImode || (MODE) == SImode \
+ || (MODE) == DImode \
+ || (MODE) == CQImode || (MODE) == CHImode || (MODE) == CSImode \
+ || (MODE) == CDImode \
+ || (TARGET_64BIT && ((MODE) == TImode || (MODE) == CTImode \
+ || (MODE) == TFmode || (MODE) == TCmode)))
+
+/* Return true for modes passed in SSE registers. */
+#define SSE_REG_MODE_P(MODE) \
+ ((MODE) == TImode || (MODE) == V16QImode || (MODE) == TFmode \
+ || (MODE) == V8HImode || (MODE) == V2DFmode || (MODE) == V2DImode \
+ || (MODE) == V4SFmode || (MODE) == V4SImode || (MODE) == V32QImode \
+ || (MODE) == V16HImode || (MODE) == V8SImode || (MODE) == V4DImode \
+ || (MODE) == V8SFmode || (MODE) == V4DFmode)
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */
+
+#define HARD_REGNO_MODE_OK(REGNO, MODE) \
+ ix86_hard_regno_mode_ok ((REGNO), (MODE))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+ when one has mode MODE1 and one has mode MODE2.
+ If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+ for any hard reg, then this must be 0 for correct output. */
+
+#define MODES_TIEABLE_P(MODE1, MODE2) ix86_modes_tieable_p (MODE1, MODE2)
+
+/* It is possible to write patterns to move flags; but until someone
+ does it, */
+#define AVOID_CCMODE_COPIES
+
+/* Specify the modes required to caller save a given hard regno.
+ We do this on i386 to prevent flags from being saved at all.
+
+ Kill any attempts to combine saving of modes. */
+
+#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
+ (CC_REGNO_P (REGNO) ? VOIDmode \
+ : (MODE) == VOIDmode && (NREGS) != 1 ? VOIDmode \
+ : (MODE) == VOIDmode ? choose_hard_reg_mode ((REGNO), (NREGS), false) \
+ : (MODE) == HImode && !TARGET_PARTIAL_REG_STALL ? SImode \
+ : (MODE) == QImode && (REGNO) > BX_REG && !TARGET_64BIT ? SImode \
+ : (MODE))
+
+/* Specify the registers used for certain standard purposes.
+ The values of these macros are register numbers. */
+
+/* on the 386 the pc register is %eip, and is not usable as a general
+ register. The ordinary mov instructions won't work */
+/* #define PC_REGNUM */
+
+/* Register to use for pushing function arguments. */
+#define STACK_POINTER_REGNUM 7
+
+/* Base register for access to local variables of the function. */
+#define HARD_FRAME_POINTER_REGNUM 6
+
+/* Base register for access to local variables of the function. */
+#define FRAME_POINTER_REGNUM 20
+
+/* First floating point reg */
+#define FIRST_FLOAT_REG 8
+
+/* First & last stack-like regs */
+#define FIRST_STACK_REG FIRST_FLOAT_REG
+#define LAST_STACK_REG (FIRST_FLOAT_REG + 7)
+
+#define FIRST_SSE_REG (FRAME_POINTER_REGNUM + 1)
+#define LAST_SSE_REG (FIRST_SSE_REG + 7)
+
+#define FIRST_MMX_REG (LAST_SSE_REG + 1)
+#define LAST_MMX_REG (FIRST_MMX_REG + 7)
+
+#define FIRST_REX_INT_REG (LAST_MMX_REG + 1)
+#define LAST_REX_INT_REG (FIRST_REX_INT_REG + 7)
+
+#define FIRST_REX_SSE_REG (LAST_REX_INT_REG + 1)
+#define LAST_REX_SSE_REG (FIRST_REX_SSE_REG + 7)
+
+/* Value should be nonzero if functions must have frame pointers.
+ Zero means the frame pointer need not be set up (and parms
+ may be accessed via the stack pointer) in functions that seem suitable.
+ This is computed in `reload', in reload1.c. */
+#define FRAME_POINTER_REQUIRED ix86_frame_pointer_required ()
+
+/* Override this in other tm.h files to cope with various OS lossage
+ requiring a frame pointer. */
+#ifndef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED 0
+#endif
+
+/* Make sure we can access arbitrary call frames. */
+#define SETUP_FRAME_ADDRESSES() ix86_setup_frame_addresses ()
+
+/* Base register for access to arguments of the function. */
+#define ARG_POINTER_REGNUM 16
+
+/* Register in which static-chain is passed to a function.
+ We do use ECX as static chain register for 32 bit ABI. On the
+ 64bit ABI, ECX is an argument register, so we use R10 instead. */
+#define STATIC_CHAIN_REGNUM (TARGET_64BIT ? R10_REG : CX_REG)
+
+/* Register to hold the addressing base for position independent
+ code access to data items. We don't use PIC pointer for 64bit
+ mode. Define the regnum to dummy value to prevent gcc from
+ pessimizing code dealing with EBX.
+
+ To avoid clobbering a call-saved register unnecessarily, we renumber
+ the pic register when possible. The change is visible after the
+ prologue has been emitted. */
+
+#define REAL_PIC_OFFSET_TABLE_REGNUM BX_REG
+
+#define PIC_OFFSET_TABLE_REGNUM \
+ ((TARGET_64BIT && ix86_cmodel == CM_SMALL_PIC) \
+ || !flag_pic ? INVALID_REGNUM \
+ : reload_completed ? REGNO (pic_offset_table_rtx) \
+ : REAL_PIC_OFFSET_TABLE_REGNUM)
+
+#define GOT_SYMBOL_NAME "_GLOBAL_OFFSET_TABLE_"
+
+/* This is overridden by <cygwin.h>. */
+#define MS_AGGREGATE_RETURN 0
+
+/* This is overridden by <netware.h>. */
+#define KEEP_AGGREGATE_RETURN_POINTER 0
+
+/* Define the classes of registers for register constraints in the
+ machine description. Also define ranges of constants.
+
+ One of the classes must always be named ALL_REGS and include all hard regs.
+ If there is more than one class, another class must be named NO_REGS
+ and contain no registers.
+
+ The name GENERAL_REGS must be the name of a class (or an alias for
+ another name such as ALL_REGS). This is the class of registers
+ that is allowed by "g" or "r" in a register constraint.
+ Also, registers outside this class are allocated only when
+ instructions express preferences for them.
+
+ The classes must be numbered in nondecreasing order; that is,
+ a larger-numbered class must never be contained completely
+ in a smaller-numbered class.
+
+ For any two classes, it is very desirable that there be another
+ class that represents their union.
+
+ It might seem that class BREG is unnecessary, since no useful 386
+ opcode needs reg %ebx. But some systems pass args to the OS in ebx,
+ and the "b" register constraint is useful in asms for syscalls.
+
+ The flags, fpsr and fpcr registers are in no class. */
+
+enum reg_class
+{
+ NO_REGS,
+ AREG, DREG, CREG, BREG, SIREG, DIREG,
+ AD_REGS, /* %eax/%edx for DImode */
+ Q_REGS, /* %eax %ebx %ecx %edx */
+ NON_Q_REGS, /* %esi %edi %ebp %esp */
+ INDEX_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp */
+ LEGACY_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp %esp */
+ GENERAL_REGS, /* %eax %ebx %ecx %edx %esi %edi %ebp %esp %r8 - %r15*/
+ FP_TOP_REG, FP_SECOND_REG, /* %st(0) %st(1) */
+ FLOAT_REGS,
+ SSE_FIRST_REG,
+ SSE_REGS,
+ MMX_REGS,
+ FP_TOP_SSE_REGS,
+ FP_SECOND_SSE_REGS,
+ FLOAT_SSE_REGS,
+ FLOAT_INT_REGS,
+ INT_SSE_REGS,
+ FLOAT_INT_SSE_REGS,
+ ALL_REGS, LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES ((int) LIM_REG_CLASSES)
+
+#define INTEGER_CLASS_P(CLASS) \
+ reg_class_subset_p ((CLASS), GENERAL_REGS)
+#define FLOAT_CLASS_P(CLASS) \
+ reg_class_subset_p ((CLASS), FLOAT_REGS)
+#define SSE_CLASS_P(CLASS) \
+ reg_class_subset_p ((CLASS), SSE_REGS)
+#define MMX_CLASS_P(CLASS) \
+ ((CLASS) == MMX_REGS)
+#define MAYBE_INTEGER_CLASS_P(CLASS) \
+ reg_classes_intersect_p ((CLASS), GENERAL_REGS)
+#define MAYBE_FLOAT_CLASS_P(CLASS) \
+ reg_classes_intersect_p ((CLASS), FLOAT_REGS)
+#define MAYBE_SSE_CLASS_P(CLASS) \
+ reg_classes_intersect_p (SSE_REGS, (CLASS))
+#define MAYBE_MMX_CLASS_P(CLASS) \
+ reg_classes_intersect_p (MMX_REGS, (CLASS))
+
+#define Q_CLASS_P(CLASS) \
+ reg_class_subset_p ((CLASS), Q_REGS)
+
+/* Give names of register classes as strings for dump file. */
+
+#define REG_CLASS_NAMES \
+{ "NO_REGS", \
+ "AREG", "DREG", "CREG", "BREG", \
+ "SIREG", "DIREG", \
+ "AD_REGS", \
+ "Q_REGS", "NON_Q_REGS", \
+ "INDEX_REGS", \
+ "LEGACY_REGS", \
+ "GENERAL_REGS", \
+ "FP_TOP_REG", "FP_SECOND_REG", \
+ "FLOAT_REGS", \
+ "SSE_FIRST_REG", \
+ "SSE_REGS", \
+ "MMX_REGS", \
+ "FP_TOP_SSE_REGS", \
+ "FP_SECOND_SSE_REGS", \
+ "FLOAT_SSE_REGS", \
+ "FLOAT_INT_REGS", \
+ "INT_SSE_REGS", \
+ "FLOAT_INT_SSE_REGS", \
+ "ALL_REGS" }
+
+/* Define which registers fit in which classes.
+ This is an initializer for a vector of HARD_REG_SET
+ of length N_REG_CLASSES. */
+
+#define REG_CLASS_CONTENTS \
+{ { 0x00, 0x0 }, \
+ { 0x01, 0x0 }, { 0x02, 0x0 }, /* AREG, DREG */ \
+ { 0x04, 0x0 }, { 0x08, 0x0 }, /* CREG, BREG */ \
+ { 0x10, 0x0 }, { 0x20, 0x0 }, /* SIREG, DIREG */ \
+ { 0x03, 0x0 }, /* AD_REGS */ \
+ { 0x0f, 0x0 }, /* Q_REGS */ \
+ { 0x1100f0, 0x1fe0 }, /* NON_Q_REGS */ \
+ { 0x7f, 0x1fe0 }, /* INDEX_REGS */ \
+ { 0x1100ff, 0x0 }, /* LEGACY_REGS */ \
+ { 0x1100ff, 0x1fe0 }, /* GENERAL_REGS */ \
+ { 0x100, 0x0 }, { 0x0200, 0x0 },/* FP_TOP_REG, FP_SECOND_REG */\
+ { 0xff00, 0x0 }, /* FLOAT_REGS */ \
+ { 0x200000, 0x0 }, /* SSE_FIRST_REG */ \
+{ 0x1fe00000,0x1fe000 }, /* SSE_REGS */ \
+{ 0xe0000000, 0x1f }, /* MMX_REGS */ \
+{ 0x1fe00100,0x1fe000 }, /* FP_TOP_SSE_REG */ \
+{ 0x1fe00200,0x1fe000 }, /* FP_SECOND_SSE_REG */ \
+{ 0x1fe0ff00,0x3fe000 }, /* FLOAT_SSE_REGS */ \
+ { 0x1ffff, 0x1fe0 }, /* FLOAT_INT_REGS */ \
+{ 0x1fe100ff,0x1fffe0 }, /* INT_SSE_REGS */ \
+{ 0x1fe1ffff,0x1fffe0 }, /* FLOAT_INT_SSE_REGS */ \
+{ 0xffffffff,0x1fffff } \
+}
+
+/* The following macro defines cover classes for Integrated Register
+ Allocator. Cover classes is a set of non-intersected register
+ classes covering all hard registers used for register allocation
+ purpose. Any move between two registers of a cover class should be
+ cheaper than load or store of the registers. The macro value is
+ array of register classes with LIM_REG_CLASSES used as the end
+ marker. */
+
+#define IRA_COVER_CLASSES \
+{ \
+ GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES \
+}
+
+/* The same information, inverted:
+ Return the class number of the smallest class containing
+ reg number REGNO. This could be a conditional expression
+ or could index an array. */
+
+#define REGNO_REG_CLASS(REGNO) (regclass_map[REGNO])
+
+/* When defined, the compiler allows registers explicitly used in the
+ rtl to be used as spill registers but prevents the compiler from
+ extending the lifetime of these registers. */
+
+#define SMALL_REGISTER_CLASSES 1
+
+#define QI_REG_P(X) (REG_P (X) && REGNO (X) <= BX_REG)
+
+#define GENERAL_REGNO_P(N) \
+ ((N) <= STACK_POINTER_REGNUM || REX_INT_REGNO_P (N))
+
+#define GENERAL_REG_P(X) \
+ (REG_P (X) && GENERAL_REGNO_P (REGNO (X)))
+
+#define ANY_QI_REG_P(X) (TARGET_64BIT ? GENERAL_REG_P(X) : QI_REG_P (X))
+
+#define REX_INT_REGNO_P(N) \
+ IN_RANGE ((N), FIRST_REX_INT_REG, LAST_REX_INT_REG)
+#define REX_INT_REG_P(X) (REG_P (X) && REX_INT_REGNO_P (REGNO (X)))
+
+#define FP_REG_P(X) (REG_P (X) && FP_REGNO_P (REGNO (X)))
+#define FP_REGNO_P(N) IN_RANGE ((N), FIRST_STACK_REG, LAST_STACK_REG)
+#define ANY_FP_REG_P(X) (REG_P (X) && ANY_FP_REGNO_P (REGNO (X)))
+#define ANY_FP_REGNO_P(N) (FP_REGNO_P (N) || SSE_REGNO_P (N))
+
+#define X87_FLOAT_MODE_P(MODE) \
+ (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode))
+
+#define SSE_REG_P(N) (REG_P (N) && SSE_REGNO_P (REGNO (N)))
+#define SSE_REGNO_P(N) \
+ (IN_RANGE ((N), FIRST_SSE_REG, LAST_SSE_REG) \
+ || REX_SSE_REGNO_P (N))
+
+#define REX_SSE_REGNO_P(N) \
+ IN_RANGE ((N), FIRST_REX_SSE_REG, LAST_REX_SSE_REG)
+
+#define SSE_REGNO(N) \
+ ((N) < 8 ? FIRST_SSE_REG + (N) : FIRST_REX_SSE_REG + (N) - 8)
+
+#define SSE_FLOAT_MODE_P(MODE) \
+ ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
+
+#define SSE_VEC_FLOAT_MODE_P(MODE) \
+ ((TARGET_SSE && (MODE) == V4SFmode) || (TARGET_SSE2 && (MODE) == V2DFmode))
+
+#define AVX_FLOAT_MODE_P(MODE) \
+ (TARGET_AVX && ((MODE) == SFmode || (MODE) == DFmode))
+
+#define AVX128_VEC_FLOAT_MODE_P(MODE) \
+ (TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode))
+
+#define AVX256_VEC_FLOAT_MODE_P(MODE) \
+ (TARGET_AVX && ((MODE) == V8SFmode || (MODE) == V4DFmode))
+
+#define AVX_VEC_FLOAT_MODE_P(MODE) \
+ (TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode \
+ || (MODE) == V8SFmode || (MODE) == V4DFmode))
+
+#define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP)))
+#define MMX_REGNO_P(N) IN_RANGE ((N), FIRST_MMX_REG, LAST_MMX_REG)
+
+#define STACK_REG_P(XOP) (REG_P (XOP) && STACK_REGNO_P (REGNO (XOP)))
+#define STACK_REGNO_P(N) IN_RANGE ((N), FIRST_STACK_REG, LAST_STACK_REG)
+
+#define STACK_TOP_P(XOP) (REG_P (XOP) && REGNO (XOP) == FIRST_STACK_REG)
+
+#define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X)))
+#define CC_REGNO_P(X) ((X) == FLAGS_REG || (X) == FPSR_REG)
+
+/* The class value for index registers, and the one for base regs. */
+
+#define INDEX_REG_CLASS INDEX_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* Place additional restrictions on the register class to use when it
+ is necessary to be able to hold a value of mode MODE in a reload
+ register for which class CLASS would ordinarily be used. */
+
+#define LIMIT_RELOAD_CLASS(MODE, CLASS) \
+ ((MODE) == QImode && !TARGET_64BIT \
+ && ((CLASS) == ALL_REGS || (CLASS) == GENERAL_REGS \
+ || (CLASS) == LEGACY_REGS || (CLASS) == INDEX_REGS) \
+ ? Q_REGS : (CLASS))
+
+/* Given an rtx X being reloaded into a reg required to be
+ in class CLASS, return the class of reg to actually use.
+ In general this is just CLASS; but on some machines
+ in some cases it is preferable to use a more restrictive class.
+ On the 80386 series, we prevent floating constants from being
+ reloaded into floating registers (since no move-insn can do that)
+ and we ensure that QImodes aren't reloaded into the esi or edi reg. */
+
+/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
+ QImode must go into class Q_REGS.
+ Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
+ movdf to do mem-to-mem moves through integer regs. */
+
+#define PREFERRED_RELOAD_CLASS(X, CLASS) \
+ ix86_preferred_reload_class ((X), (CLASS))
+
+/* Discourage putting floating-point values in SSE registers unless
+ SSE math is being used, and likewise for the 387 registers. */
+
+#define PREFERRED_OUTPUT_RELOAD_CLASS(X, CLASS) \
+ ix86_preferred_output_reload_class ((X), (CLASS))
+
+/* If we are copying between general and FP registers, we need a memory
+ location. The same is true for SSE and MMX registers. */
+#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
+ ix86_secondary_memory_needed ((CLASS1), (CLASS2), (MODE), 1)
+
+/* Get_secondary_mem widens integral modes to BITS_PER_WORD.
+ There is no need to emit full 64 bit move on 64 bit targets
+ for integral modes that can be moved using 32 bit move. */
+#define SECONDARY_MEMORY_NEEDED_MODE(MODE) \
+ (GET_MODE_BITSIZE (MODE) < 32 && INTEGRAL_MODE_P (MODE) \
+ ? mode_for_size (32, GET_MODE_CLASS (MODE), 0) \
+ : MODE)
+
+/* Return the maximum number of consecutive registers
+ needed to represent mode MODE in a register of class CLASS. */
+/* On the 80386, this is the size of MODE in words,
+ except in the FP regs, where a single reg is always enough. */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+ (!MAYBE_INTEGER_CLASS_P (CLASS) \
+ ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \
+ : (((((MODE) == XFmode ? 12 : GET_MODE_SIZE (MODE))) \
+ + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* A C expression whose value is nonzero if pseudos that have been
+ assigned to registers of class CLASS would likely be spilled
+ because registers of CLASS are needed for spill registers.
+
+ The default value of this macro returns 1 if CLASS has exactly one
+ register and zero otherwise. On most machines, this default
+ should be used. Only define this macro to some other expression
+ if pseudo allocated by `local-alloc.c' end up in memory because
+ their hard registers were needed for spill registers. If this
+ macro returns nonzero for those classes, those pseudos will only
+ be allocated by `global.c', which knows how to reallocate the
+ pseudo to another register. If there would not be another
+ register available for reallocation, you should not change the
+ definition of this macro since the only effect of such a
+ definition would be to slow down register allocation. */
+
+#define CLASS_LIKELY_SPILLED_P(CLASS) \
+ (((CLASS) == AREG) \
+ || ((CLASS) == DREG) \
+ || ((CLASS) == CREG) \
+ || ((CLASS) == BREG) \
+ || ((CLASS) == AD_REGS) \
+ || ((CLASS) == SIREG) \
+ || ((CLASS) == DIREG) \
+ || ((CLASS) == FP_TOP_REG) \
+ || ((CLASS) == FP_SECOND_REG))
+
+/* Return a class of registers that cannot change FROM mode to TO mode. */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+ ix86_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Stack layout; function entry, exit and calling. */
+
+/* Define this if pushing a word on the stack
+ makes the stack pointer a smaller address. */
+#define STACK_GROWS_DOWNWARD
+
+/* Define this to nonzero if the nominal address of the stack frame
+ is at the high-address end of the local variables;
+ that is, each additional local variable allocated
+ goes at a more negative offset in the frame. */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset within stack frame to start allocating local variables at.
+ If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
+ first local allocated. Otherwise, it is the offset to the BEGINNING
+ of the first local allocated. */
+#define STARTING_FRAME_OFFSET 0
+
+/* If we generate an insn to push BYTES bytes,
+ this says how many the stack pointer really advances by.
+ On 386, we have pushw instruction that decrements by exactly 2 no
+ matter what the position was, there is no pushb.
+ But as CIE data alignment factor on this arch is -4, we need to make
+ sure all stack pointer adjustments are in multiple of 4.
+
+ For 64bit ABI we round up to 8 bytes.
+ */
+
+#define PUSH_ROUNDING(BYTES) \
+ (TARGET_64BIT \
+ ? (((BYTES) + 7) & (-8)) \
+ : (((BYTES) + 3) & (-4)))
+
+/* If defined, the maximum amount of space required for outgoing arguments will
+ be computed and placed into the variable
+ `crtl->outgoing_args_size'. No space will be pushed onto the
+ stack for each call; instead, the function prologue should increase the stack
+ frame size by this amount.
+
+ MS ABI seem to require 16 byte alignment everywhere except for function
+ prologue and apilogue. This is not possible without
+ ACCUMULATE_OUTGOING_ARGS. */
+
+#define ACCUMULATE_OUTGOING_ARGS \
+ (TARGET_ACCUMULATE_OUTGOING_ARGS || ix86_cfun_abi () == MS_ABI)
+
+/* If defined, a C expression whose value is nonzero when we want to use PUSH
+ instructions to pass outgoing arguments. */
+
+#define PUSH_ARGS (TARGET_PUSH_ARGS && !ACCUMULATE_OUTGOING_ARGS)
+
+/* We want the stack and args grow in opposite directions, even if
+ PUSH_ARGS is 0. */
+#define PUSH_ARGS_REVERSED 1
+
+/* Offset of first parameter from the argument pointer register value. */
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Define this macro if functions should assume that stack space has been
+ allocated for arguments even when their values are passed in registers.
+
+ The value of this macro is the size, in bytes, of the area reserved for
+ arguments passed in registers for the function represented by FNDECL.
+
+ This space can be allocated by the caller, or be a part of the
+ machine-dependent stack frame: `OUTGOING_REG_PARM_STACK_SPACE' says
+ which. */
+#define REG_PARM_STACK_SPACE(FNDECL) ix86_reg_parm_stack_space (FNDECL)
+
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) \
+ (ix86_function_type_abi (FNTYPE) == MS_ABI)
+
+/* Value is the number of bytes of arguments automatically
+ popped when returning from a subroutine call.
+ FUNDECL is the declaration node of the function (as a tree),
+ FUNTYPE is the data type of the function (as a tree),
+ or for a library call it is an identifier node for the subroutine name.
+ SIZE is the number of bytes of arguments passed on the stack.
+
+ On the 80386, the RTD insn may be used to pop them if the number
+ of args is fixed, but if the number is variable then the caller
+ must pop them all. RTD can't be used for library calls now
+ because the library is compiled with the Unix compiler.
+ Use of RTD is a selectable option, since it is incompatible with
+ standard Unix calling sequences. If the option is not selected,
+ the caller must always pop the args.
+
+ The attribute stdcall is equivalent to RTD on a per module basis. */
+
+#define RETURN_POPS_ARGS(FUNDECL, FUNTYPE, SIZE) \
+ ix86_return_pops_args ((FUNDECL), (FUNTYPE), (SIZE))
+
+#define FUNCTION_VALUE_REGNO_P(N) ix86_function_value_regno_p (N)
+
+/* Define how to find the value returned by a library function
+ assuming the value has mode MODE. */
+
+#define LIBCALL_VALUE(MODE) ix86_libcall_value (MODE)
+
+/* Define the size of the result block used for communication between
+ untyped_call and untyped_return. The block contains a DImode value
+ followed by the block used by fnsave and frstor. */
+
+#define APPLY_RESULT_SIZE (8+108)
+
+/* 1 if N is a possible register number for function argument passing. */
+#define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p (N)
+
+/* Define a data type for recording info about an argument list
+ during the scan of that argument list. This data type should
+ hold all necessary information about the function itself
+ and about the args processed so far, enough to enable macros
+ such as FUNCTION_ARG to determine where the next arg should go. */
+
+typedef struct ix86_args {
+ int words; /* # words passed so far */
+ int nregs; /* # registers available for passing */
+ int regno; /* next available register number */
+ int fastcall; /* fastcall calling convention is used */
+ int sse_words; /* # sse words passed so far */
+ int sse_nregs; /* # sse registers available for passing */
+ int warn_avx; /* True when we want to warn about AVX ABI. */
+ int warn_sse; /* True when we want to warn about SSE ABI. */
+ int warn_mmx; /* True when we want to warn about MMX ABI. */
+ int sse_regno; /* next available sse register number */
+ int mmx_words; /* # mmx words passed so far */
+ int mmx_nregs; /* # mmx registers available for passing */
+ int mmx_regno; /* next available mmx register number */
+ int maybe_vaarg; /* true for calls to possibly vardic fncts. */
+ int float_in_sse; /* 1 if in 32-bit mode SFmode (2 for DFmode) should
+ be passed in SSE registers. Otherwise 0. */
+ int call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise
+ MS_ABI for ms abi. */
+} CUMULATIVE_ARGS;
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+ for a call to a function whose data type is FNTYPE.
+ For a library call, FNTYPE is 0. */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+ init_cumulative_args (&(CUM), (FNTYPE), (LIBNAME), (FNDECL))
+
+/* Update the data in CUM to advance over an argument
+ of mode MODE and data type TYPE.
+ (TYPE is null for libcalls where that information may not be available.) */
+
+#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \
+ function_arg_advance (&(CUM), (MODE), (TYPE), (NAMED))
+
+/* Define where to put the arguments to a function.
+ Value is zero to push the argument on the stack,
+ or a hard register in which to store the argument.
+
+ MODE is the argument's machine mode.
+ TYPE is the data type of the argument (as a tree).
+ This is null for libcalls where that information may
+ not be available.
+ CUM is a variable of type CUMULATIVE_ARGS which gives info about
+ the preceding args and about the function being called.
+ NAMED is nonzero if this argument is a named parameter
+ (otherwise it is an extra parameter matching an ellipsis). */
+
+#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \
+ function_arg (&(CUM), (MODE), (TYPE), (NAMED))
+
+#define TARGET_ASM_FILE_END ix86_file_end
+#define NEED_INDICATE_EXEC_STACK 0
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+ for profiling a function entry. */
+
+#define FUNCTION_PROFILER(FILE, LABELNO) x86_function_profiler (FILE, LABELNO)
+
+#define MCOUNT_NAME "_mcount"
+
+#define PROFILE_COUNT_REGISTER "edx"
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+ the stack pointer does not matter. The value is tested only in
+ functions that have frame pointers.
+ No definition is equivalent to always zero. */
+/* Note on the 386 it might be more efficient not to define this since
+ we have to restore it ourselves from the frame pointer, in order to
+ use pop */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Output assembler code for a block containing the constant parts
+ of a trampoline, leaving space for the variable parts. */
+
+/* On the 386, the trampoline contains two instructions:
+ mov #STATIC,ecx
+ jmp FUNCTION
+ The trampoline is generated entirely at runtime. The operand of JMP
+ is the address of FUNCTION relative to the instruction following the
+ JMP (which is 5 bytes long). */
+
+/* Length in units of the trampoline for entering a nested function. */
+
+#define TRAMPOLINE_SIZE (TARGET_64BIT ? 23 : 10)
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+ FNADDR is an RTX for the address of the function's pure code.
+ CXT is an RTX for the static chain value for the function. */
+
+#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \
+ x86_initialize_trampoline ((TRAMP), (FNADDR), (CXT))
+
+/* Definitions for register eliminations.
+
+ This is an array of structures. Each structure initializes one pair
+ of eliminable registers. The "from" register number is given first,
+ followed by "to". Eliminations of the same "from" register are listed
+ in order of preference.
+
+ There are two registers that can always be eliminated on the i386.
+ The frame pointer and the arg pointer can be replaced by either the
+ hard frame pointer or to the stack pointer, depending upon the
+ circumstances. The hard frame pointer is not used before reload and
+ so it is not eligible for elimination. */
+
+#define ELIMINABLE_REGS \
+{{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}, \
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}, \
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}} \
+
+/* Given FROM and TO register numbers, say whether this elimination is
+ allowed. */
+
+#define CAN_ELIMINATE(FROM, TO) ix86_can_eliminate ((FROM), (TO))
+
+/* Define the offset between two registers, one to be eliminated, and the other
+ its replacement, at the start of a routine. */
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+ ((OFFSET) = ix86_initial_elimination_offset ((FROM), (TO)))
+
+/* Addressing modes, and classification of registers for them. */
+
+/* Macros to check register numbers against specific register classes. */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+ They give nonzero only if REGNO is a hard reg of the suitable class
+ or a pseudo reg currently allocated to a suitable hard reg.
+ Since they use reg_renumber, they are safe only once reg_renumber
+ has been allocated, which happens in local-alloc.c. */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) \
+ ((REGNO) < STACK_POINTER_REGNUM \
+ || REX_INT_REGNO_P (REGNO) \
+ || (unsigned) reg_renumber[(REGNO)] < STACK_POINTER_REGNUM \
+ || REX_INT_REGNO_P ((unsigned) reg_renumber[(REGNO)]))
+
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+ (GENERAL_REGNO_P (REGNO) \
+ || (REGNO) == ARG_POINTER_REGNUM \
+ || (REGNO) == FRAME_POINTER_REGNUM \
+ || GENERAL_REGNO_P ((unsigned) reg_renumber[(REGNO)]))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+ and check its validity for a certain class.
+ We have two alternate definitions for each of them.
+ The usual definition accepts all pseudo regs; the other rejects
+ them unless they have been allocated suitable hard regs.
+ The symbol REG_OK_STRICT causes the latter definition to be used.
+
+ Most source files want to accept pseudo regs in the hope that
+ they will get allocated to the class that the insn wants them to be in.
+ Source files for reload pass need to be strict.
+ After reload, it makes no difference, since pseudo regs have
+ been eliminated by then. */
+
+
+/* Non strict versions, pseudos are ok. */
+#define REG_OK_FOR_INDEX_NONSTRICT_P(X) \
+ (REGNO (X) < STACK_POINTER_REGNUM \
+ || REX_INT_REGNO_P (REGNO (X)) \
+ || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+#define REG_OK_FOR_BASE_NONSTRICT_P(X) \
+ (GENERAL_REGNO_P (REGNO (X)) \
+ || REGNO (X) == ARG_POINTER_REGNUM \
+ || REGNO (X) == FRAME_POINTER_REGNUM \
+ || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* Strict versions, hard registers only */
+#define REG_OK_FOR_INDEX_STRICT_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
+#define REG_OK_FOR_BASE_STRICT_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#ifndef REG_OK_STRICT
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_INDEX_NONSTRICT_P (X)
+#define REG_OK_FOR_BASE_P(X) REG_OK_FOR_BASE_NONSTRICT_P (X)
+
+#else
+#define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_INDEX_STRICT_P (X)
+#define REG_OK_FOR_BASE_P(X) REG_OK_FOR_BASE_STRICT_P (X)
+#endif
+
+/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression
+ that is a valid memory address for an instruction.
+ The MODE argument is the machine mode for the MEM expression
+ that wants to use this address.
+
+ The other macros defined here are used only in GO_IF_LEGITIMATE_ADDRESS,
+ except for CONSTANT_ADDRESS_P which is usually machine-independent.
+
+ See legitimize_pic_address in i386.c for details as to what
+ constitutes a legitimate address when -fpic is used. */
+
+#define MAX_REGS_PER_ADDRESS 2
+
+#define CONSTANT_ADDRESS_P(X) constant_address_p (X)
+
+/* Nonzero if the constant value X is a legitimate general operand.
+ It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
+
+#define LEGITIMATE_CONSTANT_P(X) legitimate_constant_p (X)
+
+#ifdef REG_OK_STRICT
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR) \
+do { \
+ if (legitimate_address_p ((MODE), (X), 1)) \
+ goto ADDR; \
+} while (0)
+
+#else
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR) \
+do { \
+ if (legitimate_address_p ((MODE), (X), 0)) \
+ goto ADDR; \
+} while (0)
+
+#endif
+
+/* If defined, a C expression to determine the base term of address X.
+ This macro is used in only one place: `find_base_term' in alias.c.
+
+ It is always safe for this macro to not be defined. It exists so
+ that alias analysis can understand machine-dependent addresses.
+
+ The typical use of this macro is to handle addresses containing
+ a label_ref or symbol_ref within an UNSPEC. */
+
+#define FIND_BASE_TERM(X) ix86_find_base_term (X)
+
+/* Try machine-dependent ways of modifying an illegitimate address
+ to be legitimate. If we find one, return the new, valid address.
+ This macro is used in only one place: `memory_address' in explow.c.
+
+ OLDX is the address as it was before break_out_memory_refs was called.
+ In some cases it is useful to look at this to decide what needs to be done.
+
+ MODE and WIN are passed so that this macro can use
+ GO_IF_LEGITIMATE_ADDRESS.
+
+ It is always safe for this macro to do nothing. It exists to recognize
+ opportunities to optimize the output.
+
+ For the 80386, we handle X+REG by loading X into a register R and
+ using R+REG. R will go in a general reg and indexing will be used.
+ However, if REG is a broken-out memory address or multiplication,
+ nothing needs to be done because REG can certainly go in a general reg.
+
+ When -fpic is used, special handling is needed for symbolic references.
+ See comments by legitimize_pic_address in i386.c for details. */
+
+#define LEGITIMIZE_ADDRESS(X, OLDX, MODE, WIN) \
+do { \
+ (X) = legitimize_address ((X), (OLDX), (MODE)); \
+ if (memory_address_p ((MODE), (X))) \
+ goto WIN; \
+} while (0)
+
+/* Nonzero if the constant value X is a legitimate general operand
+ when generating PIC code. It is given that flag_pic is on and
+ that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
+
+#define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X)
+
+#define SYMBOLIC_CONST(X) \
+ (GET_CODE (X) == SYMBOL_REF \
+ || GET_CODE (X) == LABEL_REF \
+ || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
+
+/* Go to LABEL if ADDR (a legitimate address expression)
+ has an effect that depends on the machine mode it is used for.
+ On the 80386, only postdecrement and postincrement address depend thus
+ (the amount of decrement or increment being the length of the operand).
+ These are now caught in recog.c. */
+#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL)
+
+/* Max number of args passed in registers. If this is more than 3, we will
+ have problems with ebx (register #4), since it is a caller save register and
+ is also used as the pic register in ELF. So for now, don't allow more than
+ 3 registers to be passed in registers. */
+
+/* Abi specific values for REGPARM_MAX and SSE_REGPARM_MAX */
+#define X86_64_REGPARM_MAX 6
+#define X64_REGPARM_MAX 4
+#define X86_32_REGPARM_MAX 3
+
+#define X86_64_SSE_REGPARM_MAX 8
+#define X64_SSE_REGPARM_MAX 4
+#define X86_32_SSE_REGPARM_MAX (TARGET_SSE ? 3 : 0)
+
+#define REGPARM_MAX \
+ (TARGET_64BIT ? (TARGET_64BIT_MS_ABI ? X64_REGPARM_MAX \
+ : X86_64_REGPARM_MAX) \
+ : X86_32_REGPARM_MAX)
+
+#define SSE_REGPARM_MAX \
+ (TARGET_64BIT ? (TARGET_64BIT_MS_ABI ? X64_SSE_REGPARM_MAX \
+ : X86_64_SSE_REGPARM_MAX) \
+ : X86_32_SSE_REGPARM_MAX)
+
+#define MMX_REGPARM_MAX (TARGET_64BIT ? 0 : (TARGET_MMX ? 3 : 0))
+
+
+/* Specify the machine mode that this machine uses
+ for the index in the tablejump instruction. */
+#define CASE_VECTOR_MODE \
+ (!TARGET_64BIT || (flag_pic && ix86_cmodel != CM_LARGE_PIC) ? SImode : DImode)
+
+/* Define this as 1 if `char' should by default be signed; else as 0. */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Max number of bytes we can move from memory to memory
+ in one reasonably fast instruction. */
+#define MOVE_MAX 16
+
+/* MOVE_MAX_PIECES is the number of bytes at a time which we can
+ move efficiently, as opposed to MOVE_MAX which is the maximum
+ number of bytes we can move with a single instruction. */
+#define MOVE_MAX_PIECES (TARGET_64BIT ? 8 : 4)
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+ move-instruction pairs, we will do a movmem or libcall instead.
+ Increasing the value will always make code faster, but eventually
+ incurs high cost in increased code size.
+
+ If you don't define this, a reasonable default is used. */
+
+#define MOVE_RATIO(speed) ((speed) ? ix86_cost->move_ratio : 3)
+
+/* If a clear memory operation would take CLEAR_RATIO or more simple
+ move-instruction sequences, we will do a clrmem or libcall instead. */
+
+#define CLEAR_RATIO(speed) ((speed) ? MIN (6, ix86_cost->move_ratio) : 2)
+
+/* Define if shifts truncate the shift count
+ which implies one can omit a sign-extension or zero-extension
+ of a shift count. */
+/* On i386, shifts do truncate the count. But bit opcodes don't. */
+
+/* #define SHIFT_COUNT_TRUNCATED */
+
+/* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
+ is done just by pretending it is already truncated. */
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+/* A macro to update M and UNSIGNEDP when an object whose type is
+ TYPE and which has the specified mode and signedness is to be
+ stored in a register. This macro is only called when TYPE is a
+ scalar type.
+
+ On i386 it is sometimes useful to promote HImode and QImode
+ quantities to SImode. The choice depends on target type. */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
+do { \
+ if (((MODE) == HImode && TARGET_PROMOTE_HI_REGS) \
+ || ((MODE) == QImode && TARGET_PROMOTE_QI_REGS)) \
+ (MODE) = SImode; \
+} while (0)
+
+/* Specify the machine mode that pointers have.
+ After generation of rtl, the compiler makes no further distinction
+ between pointers and any other objects of this machine mode. */
+#define Pmode (TARGET_64BIT ? DImode : SImode)
+
+/* A function address in a call instruction
+ is a byte address (for indexing purposes)
+ so give the MEM rtx a byte's mode. */
+#define FUNCTION_MODE QImode
+
+/* A C expression for the cost of moving data from a register in class FROM to
+ one in class TO. The classes are expressed using the enumeration values
+ such as `GENERAL_REGS'. A value of 2 is the default; other values are
+ interpreted relative to that.
+
+ It is not required that the cost always equal 2 when FROM is the same as TO;
+ on some machines it is expensive to move between registers if they are not
+ general registers. */
+
+#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) \
+ ix86_register_move_cost ((MODE), (CLASS1), (CLASS2))
+
+/* A C expression for the cost of moving data of mode M between a
+ register and memory. A value of 2 is the default; this cost is
+ relative to those in `REGISTER_MOVE_COST'.
+
+ If moving between registers and memory is more expensive than
+ between two registers, you should define this macro to express the
+ relative cost. */
+
+#define MEMORY_MOVE_COST(MODE, CLASS, IN) \
+ ix86_memory_move_cost ((MODE), (CLASS), (IN))
+
+/* A C expression for the cost of a branch instruction. A value of 1
+ is the default; other values are interpreted relative to that. */
+
+#define BRANCH_COST(speed_p, predictable_p) \
+ (!(speed_p) ? 2 : (predictable_p) ? 0 : ix86_branch_cost)
+
+/* Define this macro as a C expression which is nonzero if accessing
+ less than a word of memory (i.e. a `char' or a `short') is no
+ faster than accessing a word of memory, i.e., if such access
+ require more than one instruction or if there is no difference in
+ cost between byte and (aligned) word loads.
+
+ When this macro is not defined, the compiler will access a field by
+ finding the smallest containing object; when it is defined, a
+ fullword load will be used if alignment permits. Unless bytes
+ accesses are faster than word accesses, using word accesses is
+ preferable since it may eliminate subsequent memory access if
+ subsequent accesses occur to other fields in the same word of the
+ structure, but to different bytes. */
+
+#define SLOW_BYTE_ACCESS 0
+
+/* Nonzero if access to memory by shorts is slow and undesirable. */
+#define SLOW_SHORT_ACCESS 0
+
+/* Define this macro to be the value 1 if unaligned accesses have a
+ cost many times greater than aligned accesses, for example if they
+ are emulated in a trap handler.
+
+ When this macro is nonzero, the compiler will act as if
+ `STRICT_ALIGNMENT' were nonzero when generating code for block
+ moves. This can cause significantly more instructions to be
+ produced. Therefore, do not set this macro nonzero if unaligned
+ accesses only add a cycle or two to the time for a memory access.
+
+ If the value of this macro is always zero, it need not be defined. */
+
+/* #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 0 */
+
+/* Define this macro if it is as good or better to call a constant
+ function address than to call an address kept in a register.
+
+ Desirable on the 386 because a CALL with a constant address is
+ faster than one with a register address. */
+
+#define NO_FUNCTION_CSE
+
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+ return the mode to be used for the comparison.
+
+ For floating-point equality comparisons, CCFPEQmode should be used.
+ VOIDmode should be used in all other cases.
+
+ For integer comparisons against zero, reduce to CCNOmode or CCZmode if
+ possible, to allow for more combinations. */
+
+#define SELECT_CC_MODE(OP, X, Y) ix86_cc_mode ((OP), (X), (Y))
+
+/* Return nonzero if MODE implies a floating point inequality can be
+ reversed. */
+
+#define REVERSIBLE_CC_MODE(MODE) 1
+
+/* A C expression whose value is reversed condition code of the CODE for
+ comparison done in CC_MODE mode. */
+#define REVERSE_CONDITION(CODE, MODE) ix86_reverse_condition ((CODE), (MODE))
+
+
+/* Control the assembler format that we output, to the extent
+ this does not vary between assemblers. */
+
+/* How to refer to registers in assembler output.
+ This sequence is indexed by compiler's hard-register-number (see above). */
+
+/* In order to refer to the first 8 regs as 32-bit regs, prefix an "e".
+ For non floating point regs, the following are the HImode names.
+
+ For float regs, the stack top is sometimes referred to as "%st(0)"
+ instead of just "%st". PRINT_OPERAND handles this with the "y" code. */
+
+#define HI_REGISTER_NAMES \
+{"ax","dx","cx","bx","si","di","bp","sp", \
+ "st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)", \
+ "argp", "flags", "fpsr", "fpcr", "frame", \
+ "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7", \
+ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", \
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"}
+
+#define REGISTER_NAMES HI_REGISTER_NAMES
+
+/* Table of additional register names to use in user input. */
+
+#define ADDITIONAL_REGISTER_NAMES \
+{ { "eax", 0 }, { "edx", 1 }, { "ecx", 2 }, { "ebx", 3 }, \
+ { "esi", 4 }, { "edi", 5 }, { "ebp", 6 }, { "esp", 7 }, \
+ { "rax", 0 }, { "rdx", 1 }, { "rcx", 2 }, { "rbx", 3 }, \
+ { "rsi", 4 }, { "rdi", 5 }, { "rbp", 6 }, { "rsp", 7 }, \
+ { "al", 0 }, { "dl", 1 }, { "cl", 2 }, { "bl", 3 }, \
+ { "ah", 0 }, { "dh", 1 }, { "ch", 2 }, { "bh", 3 } }
+
+/* Note we are omitting these since currently I don't know how
+to get gcc to use these, since they want the same but different
+number as al, and ax.
+*/
+
+#define QI_REGISTER_NAMES \
+{"al", "dl", "cl", "bl", "sil", "dil", "bpl", "spl",}
+
+/* These parallel the array above, and can be used to access bits 8:15
+ of regs 0 through 3. */
+
+#define QI_HIGH_REGISTER_NAMES \
+{"ah", "dh", "ch", "bh", }
+
+/* How to renumber registers for dbx and gdb. */
+
+#define DBX_REGISTER_NUMBER(N) \
+ (TARGET_64BIT ? dbx64_register_map[(N)] : dbx_register_map[(N)])
+
+extern int const dbx_register_map[FIRST_PSEUDO_REGISTER];
+extern int const dbx64_register_map[FIRST_PSEUDO_REGISTER];
+extern int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER];
+
+/* Before the prologue, RA is at 0(%esp). */
+#define INCOMING_RETURN_ADDR_RTX \
+ gen_rtx_MEM (VOIDmode, gen_rtx_REG (VOIDmode, STACK_POINTER_REGNUM))
+
+/* After the prologue, RA is at -4(AP) in the current frame. */
+#define RETURN_ADDR_RTX(COUNT, FRAME) \
+ ((COUNT) == 0 \
+ ? gen_rtx_MEM (Pmode, plus_constant (arg_pointer_rtx, -UNITS_PER_WORD)) \
+ : gen_rtx_MEM (Pmode, plus_constant (FRAME, UNITS_PER_WORD)))
+
+/* PC is dbx register 8; let's use that column for RA. */
+#define DWARF_FRAME_RETURN_COLUMN (TARGET_64BIT ? 16 : 8)
+
+/* Before the prologue, the top of the frame is at 4(%esp). */
+#define INCOMING_FRAME_SP_OFFSET UNITS_PER_WORD
+
+/* Describe how we implement __builtin_eh_return. */
+#define EH_RETURN_DATA_REGNO(N) ((N) < 2 ? (N) : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 2)
+
+
+/* Select a format to encode pointers in exception handling data. CODE
+ is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
+ true if the symbol may be affected by dynamic relocations.
+
+ ??? All x86 object file formats are capable of representing this.
+ After all, the relocation needed is the same as for the call insn.
+ Whether or not a particular assembler allows us to enter such, I
+ guess we'll have to see. */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+ asm_preferred_eh_data_format ((CODE), (GLOBAL))
+
+/* This is how to output an insn to push a register on the stack.
+ It need not be very fast code. */
+
+#define ASM_OUTPUT_REG_PUSH(FILE, REGNO) \
+do { \
+ if (TARGET_64BIT) \
+ asm_fprintf ((FILE), "\tpush{q}\t%%r%s\n", \
+ reg_names[(REGNO)] + (REX_INT_REGNO_P (REGNO) != 0)); \
+ else \
+ asm_fprintf ((FILE), "\tpush{l}\t%%e%s\n", reg_names[(REGNO)]); \
+} while (0)
+
+/* This is how to output an insn to pop a register from the stack.
+ It need not be very fast code. */
+
+#define ASM_OUTPUT_REG_POP(FILE, REGNO) \
+do { \
+ if (TARGET_64BIT) \
+ asm_fprintf ((FILE), "\tpop{q}\t%%r%s\n", \
+ reg_names[(REGNO)] + (REX_INT_REGNO_P (REGNO) != 0)); \
+ else \
+ asm_fprintf ((FILE), "\tpop{l}\t%%e%s\n", reg_names[(REGNO)]); \
+} while (0)
+
+/* This is how to output an element of a case-vector that is absolute. */
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+ ix86_output_addr_vec_elt ((FILE), (VALUE))
+
+/* This is how to output an element of a case-vector that is relative. */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+ ix86_output_addr_diff_elt ((FILE), (VALUE), (REL))
+
+/* When we see %v, we will print the 'v' prefix if TARGET_AVX is
+ true. */
+
+#define ASM_OUTPUT_AVX_PREFIX(STREAM, PTR) \
+{ \
+ if ((PTR)[0] == '%' && (PTR)[1] == 'v') \
+ { \
+ if (TARGET_AVX) \
+ (PTR) += 1; \
+ else \
+ (PTR) += 2; \
+ } \
+}
+
+/* A C statement or statements which output an assembler instruction
+ opcode to the stdio stream STREAM. The macro-operand PTR is a
+ variable of type `char *' which points to the opcode name in
+ its "internal" form--the form that is written in the machine
+ description. */
+
+#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
+ ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR))
+
+/* Under some conditions we need jump tables in the text section,
+ because the assembler cannot handle label differences between
+ sections. This is the case for x86_64 on Mach-O for example. */
+
+#define JUMP_TABLES_IN_TEXT_SECTION \
+ (flag_pic && ((TARGET_MACHO && TARGET_64BIT) \
+ || (!TARGET_64BIT && !HAVE_AS_GOTOFF_IN_DATA)))
+
+/* Switch to init or fini section via SECTION_OP, emit a call to FUNC,
+ and switch back. For x86 we do this only to save a few bytes that
+ would otherwise be unused in the text section. */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+ asm (SECTION_OP "\n\t" \
+ "call " USER_LABEL_PREFIX #FUNC "\n" \
+ TEXT_SECTION_ASM_OP);
+
+/* Print operand X (an rtx) in assembler syntax to file FILE.
+ CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
+ Effect of various CODE letters is described in i386.c near
+ print_operand function. */
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE) \
+ ((CODE) == '*' || (CODE) == '+' || (CODE) == '&' || (CODE) == ';')
+
+#define PRINT_OPERAND(FILE, X, CODE) \
+ print_operand ((FILE), (X), (CODE))
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) \
+ print_operand_address ((FILE), (ADDR))
+
+#define OUTPUT_ADDR_CONST_EXTRA(FILE, X, FAIL) \
+do { \
+ if (! output_addr_const_extra (FILE, (X))) \
+ goto FAIL; \
+} while (0);
+
+/* Which processor to schedule for. The cpu attribute defines a list that
+ mirrors this list, so changes to i386.md must be made at the same time. */
+
+enum processor_type
+{
+ PROCESSOR_I386 = 0, /* 80386 */
+ PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
+ PROCESSOR_PENTIUM,
+ PROCESSOR_PENTIUMPRO,
+ PROCESSOR_GEODE,
+ PROCESSOR_K6,
+ PROCESSOR_ATHLON,
+ PROCESSOR_PENTIUM4,
+ PROCESSOR_K8,
+ PROCESSOR_NOCONA,
+ PROCESSOR_CORE2,
+ PROCESSOR_GENERIC32,
+ PROCESSOR_GENERIC64,
+ PROCESSOR_AMDFAM10,
+ PROCESSOR_max
+};
+
+extern enum processor_type ix86_tune;
+extern enum processor_type ix86_arch;
+
+enum fpmath_unit
+{
+ FPMATH_387 = 1,
+ FPMATH_SSE = 2
+};
+
+extern enum fpmath_unit ix86_fpmath;
+
+enum tls_dialect
+{
+ TLS_DIALECT_GNU,
+ TLS_DIALECT_GNU2,
+ TLS_DIALECT_SUN
+};
+
+extern enum tls_dialect ix86_tls_dialect;
+
+enum cmodel {
+ CM_32, /* The traditional 32-bit ABI. */
+ CM_SMALL, /* Assumes all code and data fits in the low 31 bits. */
+ CM_KERNEL, /* Assumes all code and data fits in the high 31 bits. */
+ CM_MEDIUM, /* Assumes code fits in the low 31 bits; data unlimited. */
+ CM_LARGE, /* No assumptions. */
+ CM_SMALL_PIC, /* Assumes code+data+got/plt fits in a 31 bit region. */
+ CM_MEDIUM_PIC,/* Assumes code+got/plt fits in a 31 bit region. */
+ CM_LARGE_PIC /* No assumptions. */
+};
+
+extern enum cmodel ix86_cmodel;
+
+/* Size of the RED_ZONE area. */
+#define RED_ZONE_SIZE 128
+/* Reserved area of the red zone for temporaries. */
+#define RED_ZONE_RESERVE 8
+
+enum asm_dialect {
+ ASM_ATT,
+ ASM_INTEL
+};
+
+extern enum asm_dialect ix86_asm_dialect;
+extern unsigned int ix86_preferred_stack_boundary;
+extern unsigned int ix86_incoming_stack_boundary;
+extern int ix86_branch_cost, ix86_section_threshold;
+
+/* Smallest class containing REGNO. */
+extern enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER];
+
+extern rtx ix86_compare_op0; /* operand 0 for comparisons */
+extern rtx ix86_compare_op1; /* operand 1 for comparisons */
+extern rtx ix86_compare_emitted;
+
+/* To properly truncate FP values into integers, we need to set i387 control
+ word. We can't emit proper mode switching code before reload, as spills
+ generated by reload may truncate values incorrectly, but we still can avoid
+ redundant computation of new control word by the mode switching pass.
+ The fldcw instructions are still emitted redundantly, but this is probably
+ not going to be noticeable problem, as most CPUs do have fast path for
+ the sequence.
+
+ The machinery is to emit simple truncation instructions and split them
+ before reload to instructions having USEs of two memory locations that
+ are filled by this code to old and new control word.
+
+ Post-reload pass may be later used to eliminate the redundant fildcw if
+ needed. */
+
+enum ix86_entity
+{
+ I387_TRUNC = 0,
+ I387_FLOOR,
+ I387_CEIL,
+ I387_MASK_PM,
+ MAX_386_ENTITIES
+};
+
+enum ix86_stack_slot
+{
+ SLOT_VIRTUAL = 0,
+ SLOT_TEMP,
+ SLOT_CW_STORED,
+ SLOT_CW_TRUNC,
+ SLOT_CW_FLOOR,
+ SLOT_CW_CEIL,
+ SLOT_CW_MASK_PM,
+ MAX_386_STACK_LOCALS
+};
+
+/* Define this macro if the port needs extra instructions inserted
+ for mode switching in an optimizing compilation. */
+
+#define OPTIMIZE_MODE_SWITCHING(ENTITY) \
+ ix86_optimize_mode_switching[(ENTITY)]
+
+/* If you define `OPTIMIZE_MODE_SWITCHING', you have to define this as
+ initializer for an array of integers. Each initializer element N
+ refers to an entity that needs mode switching, and specifies the
+ number of different modes that might need to be set for this
+ entity. The position of the initializer in the initializer -
+ starting counting at zero - determines the integer that is used to
+ refer to the mode-switched entity in question. */
+
+#define NUM_MODES_FOR_MODE_SWITCHING \
+ { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
+
+/* ENTITY is an integer specifying a mode-switched entity. If
+ `OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to
+ return an integer value not larger than the corresponding element
+ in `NUM_MODES_FOR_MODE_SWITCHING', to denote the mode that ENTITY
+ must be switched into prior to the execution of INSN. */
+
+#define MODE_NEEDED(ENTITY, I) ix86_mode_needed ((ENTITY), (I))
+
+/* This macro specifies the order in which modes for ENTITY are
+ processed. 0 is the highest priority. */
+
+#define MODE_PRIORITY_TO_MODE(ENTITY, N) (N)
+
+/* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
+ is the set of hard registers live at the point where the insn(s)
+ are to be inserted. */
+
+#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
+ ((MODE) != I387_CW_ANY && (MODE) != I387_CW_UNINITIALIZED \
+ ? emit_i387_cw_initialization (MODE), 0 \
+ : 0)
+
+
+/* Avoid renaming of stack registers, as doing so in combination with
+ scheduling just increases amount of live registers at time and in
+ the turn amount of fxch instructions needed.
+
+ ??? Maybe Pentium chips benefits from renaming, someone can try.... */
+
+#define HARD_REGNO_RENAME_OK(SRC, TARGET) \
+ (! IN_RANGE ((SRC), FIRST_STACK_REG, LAST_STACK_REG))
+
+
+#define FASTCALL_PREFIX '@'
+
+#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS)
+struct machine_cfa_state GTY(())
+{
+ rtx reg;
+ HOST_WIDE_INT offset;
+};
+
+struct machine_function GTY(())
+{
+ struct stack_local_entry *stack_locals;
+ const char *some_ld_name;
+ int varargs_gpr_size;
+ int varargs_fpr_size;
+ int accesses_prev_frame;
+ int optimize_mode_switching[MAX_386_ENTITIES];
+ int needs_cld;
+ /* Set by ix86_compute_frame_layout and used by prologue/epilogue
+ expander to determine the style used. */
+ int use_fast_prologue_epilogue;
+ /* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE has been computed
+ for. */
+ int use_fast_prologue_epilogue_nregs;
+ /* If true, the current function needs the default PIC register, not
+ an alternate register (on x86) and must not use the red zone (on
+ x86_64), even if it's a leaf function. We don't want the
+ function to be regarded as non-leaf because TLS calls need not
+ affect register allocation. This flag is set when a TLS call
+ instruction is expanded within a function, and never reset, even
+ if all such instructions are optimized away. Use the
+ ix86_current_function_calls_tls_descriptor macro for a better
+ approximation. */
+ int tls_descriptor_call_expanded_p;
+ /* This value is used for amd64 targets and specifies the current abi
+ to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi. */
+ int call_abi;
+ struct machine_cfa_state cfa;
+};
+#endif
+
+#define ix86_stack_locals (cfun->machine->stack_locals)
+#define ix86_varargs_gpr_size (cfun->machine->varargs_gpr_size)
+#define ix86_varargs_fpr_size (cfun->machine->varargs_fpr_size)
+#define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
+#define ix86_current_function_needs_cld (cfun->machine->needs_cld)
+#define ix86_tls_descriptor_calls_expanded_in_cfun \
+ (cfun->machine->tls_descriptor_call_expanded_p)
+/* Since tls_descriptor_call_expanded is not cleared, even if all TLS
+ calls are optimized away, we try to detect cases in which it was
+ optimized away. Since such instructions (use (reg REG_SP)), we can
+ verify whether there's any such instruction live by testing that
+ REG_SP is live. */
+#define ix86_current_function_calls_tls_descriptor \
+ (ix86_tls_descriptor_calls_expanded_in_cfun && df_regs_ever_live_p (SP_REG))
+#define ix86_cfa_state (&cfun->machine->cfa)
+
+/* Control behavior of x86_file_start. */
+#define X86_FILE_START_VERSION_DIRECTIVE false
+#define X86_FILE_START_FLTUSED false
+
+/* Flag to mark data that is in the large address area. */
+#define SYMBOL_FLAG_FAR_ADDR (SYMBOL_FLAG_MACH_DEP << 0)
+#define SYMBOL_REF_FAR_ADDR_P(X) \
+ ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_FAR_ADDR) != 0)
+
+/* Flags to mark dllimport/dllexport. Used by PE ports, but handy to
+ have defined always, to avoid ifdefing. */
+#define SYMBOL_FLAG_DLLIMPORT (SYMBOL_FLAG_MACH_DEP << 1)
+#define SYMBOL_REF_DLLIMPORT_P(X) \
+ ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_DLLIMPORT) != 0)
+
+#define SYMBOL_FLAG_DLLEXPORT (SYMBOL_FLAG_MACH_DEP << 2)
+#define SYMBOL_REF_DLLEXPORT_P(X) \
+ ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_DLLEXPORT) != 0)
+
+/* Model costs for vectorizer. */
+
+/* Cost of conditional branch. */
+#undef TARG_COND_BRANCH_COST
+#define TARG_COND_BRANCH_COST ix86_cost->branch_cost
+
+/* Enum through the target specific extra va_list types.
+ Please, do not iterate the base va_list type name. */
+#define TARGET_ENUM_VA_LIST(IDX, PNAME, PTYPE) \
+ (TARGET_64BIT ? ix86_enum_va_list (IDX, PNAME, PTYPE) : 0)
+
+/* Cost of any scalar operation, excluding load and store. */
+#undef TARG_SCALAR_STMT_COST
+#define TARG_SCALAR_STMT_COST ix86_cost->scalar_stmt_cost
+
+/* Cost of scalar load. */
+#undef TARG_SCALAR_LOAD_COST
+#define TARG_SCALAR_LOAD_COST ix86_cost->scalar_load_cost
+
+/* Cost of scalar store. */
+#undef TARG_SCALAR_STORE_COST
+#define TARG_SCALAR_STORE_COST ix86_cost->scalar_store_cost
+
+/* Cost of any vector operation, excluding load, store or vector to scalar
+ operation. */
+#undef TARG_VEC_STMT_COST
+#define TARG_VEC_STMT_COST ix86_cost->vec_stmt_cost
+
+/* Cost of vector to scalar operation. */
+#undef TARG_VEC_TO_SCALAR_COST
+#define TARG_VEC_TO_SCALAR_COST ix86_cost->vec_to_scalar_cost
+
+/* Cost of scalar to vector operation. */
+#undef TARG_SCALAR_TO_VEC_COST
+#define TARG_SCALAR_TO_VEC_COST ix86_cost->scalar_to_vec_cost
+
+/* Cost of aligned vector load. */
+#undef TARG_VEC_LOAD_COST
+#define TARG_VEC_LOAD_COST ix86_cost->vec_align_load_cost
+
+/* Cost of misaligned vector load. */
+#undef TARG_VEC_UNALIGNED_LOAD_COST
+#define TARG_VEC_UNALIGNED_LOAD_COST ix86_cost->vec_unalign_load_cost
+
+/* Cost of vector store. */
+#undef TARG_VEC_STORE_COST
+#define TARG_VEC_STORE_COST ix86_cost->vec_store_cost
+
+/* Cost of conditional taken branch for vectorizer cost model. */
+#undef TARG_COND_TAKEN_BRANCH_COST
+#define TARG_COND_TAKEN_BRANCH_COST ix86_cost->cond_taken_branch_cost
+
+/* Cost of conditional not taken branch for vectorizer cost model. */
+#undef TARG_COND_NOT_TAKEN_BRANCH_COST
+#define TARG_COND_NOT_TAKEN_BRANCH_COST ix86_cost->cond_not_taken_branch_cost
+
+/*
+Local variables:
+version-control: t
+End:
+*/
diff --git a/gcc-4.4.0/gcc/config/i386/i386.md b/gcc-4.4.0/gcc/config/i386/i386.md
new file mode 100644
index 000000000..efde73b7a
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/i386.md
@@ -0,0 +1,21982 @@
+;; GCC machine description for IA-32 and x86-64.
+;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+;; Free Software Foundation, Inc.
+;; Mostly by William Schelter.
+;; x86_64 support added by Jan Hubicka
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>. */
+;;
+;; The original PO technology requires these to be ordered by speed,
+;; so that assigner will pick the fastest.
+;;
+;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
+;;
+;; The special asm out single letter directives following a '%' are:
+;; 'z' mov%z1 would be movl, movw, or movb depending on the mode of
+;; operands[1].
+;; 'L' Print the opcode suffix for a 32-bit integer opcode.
+;; 'W' Print the opcode suffix for a 16-bit integer opcode.
+;; 'B' Print the opcode suffix for an 8-bit integer opcode.
+;; 'Q' Print the opcode suffix for a 64-bit float opcode.
+;; 'S' Print the opcode suffix for a 32-bit float opcode.
+;; 'T' Print the opcode suffix for an 80-bit extended real XFmode float opcode.
+;; 'J' Print the appropriate jump operand.
+;;
+;; 'b' Print the QImode name of the register for the indicated operand.
+;; %b0 would print %al if operands[0] is reg 0.
+;; 'w' Likewise, print the HImode name of the register.
+;; 'k' Likewise, print the SImode name of the register.
+;; 'h' Print the QImode name for a "high" register, either ah, bh, ch or dh.
+;; 'y' Print "st(0)" instead of "st" as a register.
+
+;; UNSPEC usage:
+
+(define_constants
+ [; Relocation specifiers
+ (UNSPEC_GOT 0)
+ (UNSPEC_GOTOFF 1)
+ (UNSPEC_GOTPCREL 2)
+ (UNSPEC_GOTTPOFF 3)
+ (UNSPEC_TPOFF 4)
+ (UNSPEC_NTPOFF 5)
+ (UNSPEC_DTPOFF 6)
+ (UNSPEC_GOTNTPOFF 7)
+ (UNSPEC_INDNTPOFF 8)
+ (UNSPEC_PLTOFF 9)
+ (UNSPEC_MACHOPIC_OFFSET 10)
+
+ ; Prologue support
+ (UNSPEC_STACK_ALLOC 11)
+ (UNSPEC_SET_GOT 12)
+ (UNSPEC_SSE_PROLOGUE_SAVE 13)
+ (UNSPEC_REG_SAVE 14)
+ (UNSPEC_DEF_CFA 15)
+ (UNSPEC_SET_RIP 16)
+ (UNSPEC_SET_GOT_OFFSET 17)
+ (UNSPEC_MEMORY_BLOCKAGE 18)
+
+ ; TLS support
+ (UNSPEC_TP 20)
+ (UNSPEC_TLS_GD 21)
+ (UNSPEC_TLS_LD_BASE 22)
+ (UNSPEC_TLSDESC 23)
+
+ ; Other random patterns
+ (UNSPEC_SCAS 30)
+ (UNSPEC_FNSTSW 31)
+ (UNSPEC_SAHF 32)
+ (UNSPEC_FSTCW 33)
+ (UNSPEC_ADD_CARRY 34)
+ (UNSPEC_FLDCW 35)
+ (UNSPEC_REP 36)
+ (UNSPEC_LD_MPIC 38) ; load_macho_picbase
+ (UNSPEC_TRUNC_NOOP 39)
+
+ ; For SSE/MMX support:
+ (UNSPEC_FIX_NOTRUNC 40)
+ (UNSPEC_MASKMOV 41)
+ (UNSPEC_MOVMSK 42)
+ (UNSPEC_MOVNT 43)
+ (UNSPEC_MOVU 44)
+ (UNSPEC_RCP 45)
+ (UNSPEC_RSQRT 46)
+ (UNSPEC_SFENCE 47)
+ (UNSPEC_PFRCP 49)
+ (UNSPEC_PFRCPIT1 40)
+ (UNSPEC_PFRCPIT2 41)
+ (UNSPEC_PFRSQRT 42)
+ (UNSPEC_PFRSQIT1 43)
+ (UNSPEC_MFENCE 44)
+ (UNSPEC_LFENCE 45)
+ (UNSPEC_PSADBW 46)
+ (UNSPEC_LDDQU 47)
+ (UNSPEC_MS_TO_SYSV_CALL 48)
+
+ ; Generic math support
+ (UNSPEC_COPYSIGN 50)
+ (UNSPEC_IEEE_MIN 51) ; not commutative
+ (UNSPEC_IEEE_MAX 52) ; not commutative
+
+ ; x87 Floating point
+ (UNSPEC_SIN 60)
+ (UNSPEC_COS 61)
+ (UNSPEC_FPATAN 62)
+ (UNSPEC_FYL2X 63)
+ (UNSPEC_FYL2XP1 64)
+ (UNSPEC_FRNDINT 65)
+ (UNSPEC_FIST 66)
+ (UNSPEC_F2XM1 67)
+ (UNSPEC_TAN 68)
+ (UNSPEC_FXAM 69)
+
+ ; x87 Rounding
+ (UNSPEC_FRNDINT_FLOOR 70)
+ (UNSPEC_FRNDINT_CEIL 71)
+ (UNSPEC_FRNDINT_TRUNC 72)
+ (UNSPEC_FRNDINT_MASK_PM 73)
+ (UNSPEC_FIST_FLOOR 74)
+ (UNSPEC_FIST_CEIL 75)
+
+ ; x87 Double output FP
+ (UNSPEC_SINCOS_COS 80)
+ (UNSPEC_SINCOS_SIN 81)
+ (UNSPEC_XTRACT_FRACT 84)
+ (UNSPEC_XTRACT_EXP 85)
+ (UNSPEC_FSCALE_FRACT 86)
+ (UNSPEC_FSCALE_EXP 87)
+ (UNSPEC_FPREM_F 88)
+ (UNSPEC_FPREM_U 89)
+ (UNSPEC_FPREM1_F 90)
+ (UNSPEC_FPREM1_U 91)
+
+ (UNSPEC_C2_FLAG 95)
+ (UNSPEC_FXAM_MEM 96)
+
+ ; SSP patterns
+ (UNSPEC_SP_SET 100)
+ (UNSPEC_SP_TEST 101)
+ (UNSPEC_SP_TLS_SET 102)
+ (UNSPEC_SP_TLS_TEST 103)
+
+ ; SSSE3
+ (UNSPEC_PSHUFB 120)
+ (UNSPEC_PSIGN 121)
+ (UNSPEC_PALIGNR 122)
+
+ ; For SSE4A support
+ (UNSPEC_EXTRQI 130)
+ (UNSPEC_EXTRQ 131)
+ (UNSPEC_INSERTQI 132)
+ (UNSPEC_INSERTQ 133)
+
+ ; For SSE4.1 support
+ (UNSPEC_BLENDV 134)
+ (UNSPEC_INSERTPS 135)
+ (UNSPEC_DP 136)
+ (UNSPEC_MOVNTDQA 137)
+ (UNSPEC_MPSADBW 138)
+ (UNSPEC_PHMINPOSUW 139)
+ (UNSPEC_PTEST 140)
+ (UNSPEC_ROUND 141)
+
+ ; For SSE4.2 support
+ (UNSPEC_CRC32 143)
+ (UNSPEC_PCMPESTR 144)
+ (UNSPEC_PCMPISTR 145)
+
+ ;; For SSE5
+ (UNSPEC_SSE5_INTRINSIC 150)
+ (UNSPEC_SSE5_UNSIGNED_CMP 151)
+ (UNSPEC_SSE5_TRUEFALSE 152)
+ (UNSPEC_SSE5_PERMUTE 153)
+ (UNSPEC_FRCZ 154)
+ (UNSPEC_CVTPH2PS 155)
+ (UNSPEC_CVTPS2PH 156)
+
+ ; For AES support
+ (UNSPEC_AESENC 159)
+ (UNSPEC_AESENCLAST 160)
+ (UNSPEC_AESDEC 161)
+ (UNSPEC_AESDECLAST 162)
+ (UNSPEC_AESIMC 163)
+ (UNSPEC_AESKEYGENASSIST 164)
+
+ ; For PCLMUL support
+ (UNSPEC_PCLMUL 165)
+
+ ; For AVX support
+ (UNSPEC_PCMP 166)
+ (UNSPEC_VPERMIL 167)
+ (UNSPEC_VPERMIL2F128 168)
+ (UNSPEC_MASKLOAD 169)
+ (UNSPEC_MASKSTORE 170)
+ (UNSPEC_CAST 171)
+ (UNSPEC_VTESTP 172)
+ ])
+
+(define_constants
+ [(UNSPECV_BLOCKAGE 0)
+ (UNSPECV_STACK_PROBE 1)
+ (UNSPECV_EMMS 2)
+ (UNSPECV_LDMXCSR 3)
+ (UNSPECV_STMXCSR 4)
+ (UNSPECV_FEMMS 5)
+ (UNSPECV_CLFLUSH 6)
+ (UNSPECV_ALIGN 7)
+ (UNSPECV_MONITOR 8)
+ (UNSPECV_MWAIT 9)
+ (UNSPECV_CMPXCHG 10)
+ (UNSPECV_XCHG 12)
+ (UNSPECV_LOCK 13)
+ (UNSPECV_PROLOGUE_USE 14)
+ (UNSPECV_CLD 15)
+ (UNSPECV_VZEROALL 16)
+ (UNSPECV_VZEROUPPER 17)
+ ])
+
+;; Constants to represent pcomtrue/pcomfalse variants
+(define_constants
+ [(PCOM_FALSE 0)
+ (PCOM_TRUE 1)
+ (COM_FALSE_S 2)
+ (COM_FALSE_P 3)
+ (COM_TRUE_S 4)
+ (COM_TRUE_P 5)
+ ])
+
+;; Constants used in the SSE5 pperm instruction
+(define_constants
+ [(PPERM_SRC 0x00) /* copy source */
+ (PPERM_INVERT 0x20) /* invert source */
+ (PPERM_REVERSE 0x40) /* bit reverse source */
+ (PPERM_REV_INV 0x60) /* bit reverse & invert src */
+ (PPERM_ZERO 0x80) /* all 0's */
+ (PPERM_ONES 0xa0) /* all 1's */
+ (PPERM_SIGN 0xc0) /* propagate sign bit */
+ (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */
+ (PPERM_SRC1 0x00) /* use first source byte */
+ (PPERM_SRC2 0x10) /* use second source byte */
+ ])
+
+;; Registers by name.
+(define_constants
+ [(AX_REG 0)
+ (DX_REG 1)
+ (CX_REG 2)
+ (BX_REG 3)
+ (SI_REG 4)
+ (DI_REG 5)
+ (BP_REG 6)
+ (SP_REG 7)
+ (ST0_REG 8)
+ (ST1_REG 9)
+ (ST2_REG 10)
+ (ST3_REG 11)
+ (ST4_REG 12)
+ (ST5_REG 13)
+ (ST6_REG 14)
+ (ST7_REG 15)
+ (FLAGS_REG 17)
+ (FPSR_REG 18)
+ (FPCR_REG 19)
+ (XMM0_REG 21)
+ (XMM1_REG 22)
+ (XMM2_REG 23)
+ (XMM3_REG 24)
+ (XMM4_REG 25)
+ (XMM5_REG 26)
+ (XMM6_REG 27)
+ (XMM7_REG 28)
+ (MM0_REG 29)
+ (MM1_REG 30)
+ (MM2_REG 31)
+ (MM3_REG 32)
+ (MM4_REG 33)
+ (MM5_REG 34)
+ (MM6_REG 35)
+ (MM7_REG 36)
+ (R8_REG 37)
+ (R9_REG 38)
+ (R10_REG 39)
+ (R11_REG 40)
+ (R13_REG 42)
+ (XMM8_REG 45)
+ (XMM9_REG 46)
+ (XMM10_REG 47)
+ (XMM11_REG 48)
+ (XMM12_REG 49)
+ (XMM13_REG 50)
+ (XMM14_REG 51)
+ (XMM15_REG 52)
+ ])
+
+;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
+;; from i386.c.
+
+;; In C guard expressions, put expressions which may be compile-time
+;; constants first. This allows for better optimization. For
+;; example, write "TARGET_64BIT && reload_completed", not
+;; "reload_completed && TARGET_64BIT".
+
+
+;; Processor type.
+(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,
+ generic64,amdfam10"
+ (const (symbol_ref "ix86_schedule")))
+
+;; A basic instruction type. Refinements due to arguments to be
+;; provided in other attributes.
+(define_attr "type"
+ "other,multi,
+ alu,alu1,negnot,imov,imovx,lea,
+ incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
+ icmp,test,ibr,setcc,icmov,
+ push,pop,call,callv,leave,
+ str,bitmanip,
+ fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
+ sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul,
+ sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
+ ssemuladd,sse4arg,
+ mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
+ (const_string "other"))
+
+;; Main data type used by the insn
+(define_attr "mode"
+ "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF"
+ (const_string "unknown"))
+
+;; The CPU unit operations uses.
+(define_attr "unit" "integer,i387,sse,mmx,unknown"
+ (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
+ (const_string "i387")
+ (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul,
+ sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
+ ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
+ (const_string "sse")
+ (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
+ (const_string "mmx")
+ (eq_attr "type" "other")
+ (const_string "unknown")]
+ (const_string "integer")))
+
+;; The (bounding maximum) length of an instruction immediate.
+(define_attr "length_immediate" ""
+ (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
+ bitmanip")
+ (const_int 0)
+ (eq_attr "unit" "i387,sse,mmx")
+ (const_int 0)
+ (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1,
+ imul,icmp,push,pop")
+ (symbol_ref "ix86_attr_length_immediate_default(insn,1)")
+ (eq_attr "type" "imov,test")
+ (symbol_ref "ix86_attr_length_immediate_default(insn,0)")
+ (eq_attr "type" "call")
+ (if_then_else (match_operand 0 "constant_call_address_operand" "")
+ (const_int 4)
+ (const_int 0))
+ (eq_attr "type" "callv")
+ (if_then_else (match_operand 1 "constant_call_address_operand" "")
+ (const_int 4)
+ (const_int 0))
+ ;; We don't know the size before shorten_branches. Expect
+ ;; the instruction to fit for better scheduling.
+ (eq_attr "type" "ibr")
+ (const_int 1)
+ ]
+ (symbol_ref "/* Update immediate_length and other attributes! */
+ gcc_unreachable (),1")))
+
+;; The (bounding maximum) length of an instruction address.
+(define_attr "length_address" ""
+ (cond [(eq_attr "type" "str,other,multi,fxch")
+ (const_int 0)
+ (and (eq_attr "type" "call")
+ (match_operand 0 "constant_call_address_operand" ""))
+ (const_int 0)
+ (and (eq_attr "type" "callv")
+ (match_operand 1 "constant_call_address_operand" ""))
+ (const_int 0)
+ ]
+ (symbol_ref "ix86_attr_length_address_default (insn)")))
+
+;; Set when length prefix is used.
+(define_attr "prefix_data16" ""
+ (if_then_else (ior (eq_attr "mode" "HI")
+ (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF")))
+ (const_int 1)
+ (const_int 0)))
+
+;; Set when string REP prefix is used.
+(define_attr "prefix_rep" ""
+ (if_then_else (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF"))
+ (const_int 1)
+ (const_int 0)))
+
+;; Set when 0f opcode prefix is used.
+(define_attr "prefix_0f" ""
+ (if_then_else
+ (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip")
+ (eq_attr "unit" "sse,mmx"))
+ (const_int 1)
+ (const_int 0)))
+
+;; Set when REX opcode prefix is used.
+(define_attr "prefix_rex" ""
+ (cond [(and (eq_attr "mode" "DI")
+ (eq_attr "type" "!push,pop,call,callv,leave,ibr"))
+ (const_int 1)
+ (and (eq_attr "mode" "QI")
+ (ne (symbol_ref "x86_extended_QIreg_mentioned_p (insn)")
+ (const_int 0)))
+ (const_int 1)
+ (ne (symbol_ref "x86_extended_reg_mentioned_p (insn)")
+ (const_int 0))
+ (const_int 1)
+ ]
+ (const_int 0)))
+
+;; There are also additional prefixes in SSSE3.
+(define_attr "prefix_extra" "" (const_int 0))
+
+;; Prefix used: original, VEX or maybe VEX.
+(define_attr "prefix" "orig,vex,maybe_vex"
+ (if_then_else (eq_attr "mode" "OI,V8SF,V4DF")
+ (const_string "vex")
+ (const_string "orig")))
+
+;; There is a 8bit immediate for VEX.
+(define_attr "prefix_vex_imm8" "" (const_int 0))
+
+;; VEX W bit is used.
+(define_attr "prefix_vex_w" "" (const_int 0))
+
+;; The length of VEX prefix
+(define_attr "length_vex" ""
+ (if_then_else (eq_attr "prefix_0f" "1")
+ (if_then_else (eq_attr "prefix_vex_w" "1")
+ (symbol_ref "ix86_attr_length_vex_default (insn, 1, 1)")
+ (symbol_ref "ix86_attr_length_vex_default (insn, 1, 0)"))
+ (if_then_else (eq_attr "prefix_vex_w" "1")
+ (symbol_ref "ix86_attr_length_vex_default (insn, 0, 1)")
+ (symbol_ref "ix86_attr_length_vex_default (insn, 0, 0)"))))
+
+;; Set when modrm byte is used.
+(define_attr "modrm" ""
+ (cond [(eq_attr "type" "str,leave")
+ (const_int 0)
+ (eq_attr "unit" "i387")
+ (const_int 0)
+ (and (eq_attr "type" "incdec")
+ (ior (match_operand:SI 1 "register_operand" "")
+ (match_operand:HI 1 "register_operand" "")))
+ (const_int 0)
+ (and (eq_attr "type" "push")
+ (not (match_operand 1 "memory_operand" "")))
+ (const_int 0)
+ (and (eq_attr "type" "pop")
+ (not (match_operand 0 "memory_operand" "")))
+ (const_int 0)
+ (and (eq_attr "type" "imov")
+ (ior (and (match_operand 0 "register_operand" "")
+ (match_operand 1 "immediate_operand" ""))
+ (ior (and (match_operand 0 "ax_reg_operand" "")
+ (match_operand 1 "memory_displacement_only_operand" ""))
+ (and (match_operand 0 "memory_displacement_only_operand" "")
+ (match_operand 1 "ax_reg_operand" "")))))
+ (const_int 0)
+ (and (eq_attr "type" "call")
+ (match_operand 0 "constant_call_address_operand" ""))
+ (const_int 0)
+ (and (eq_attr "type" "callv")
+ (match_operand 1 "constant_call_address_operand" ""))
+ (const_int 0)
+ ]
+ (const_int 1)))
+
+;; The (bounding maximum) length of an instruction in bytes.
+;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences.
+;; Later we may want to split them and compute proper length as for
+;; other insns.
+(define_attr "length" ""
+ (cond [(eq_attr "type" "other,multi,fistp,frndint")
+ (const_int 16)
+ (eq_attr "type" "fcmp")
+ (const_int 4)
+ (eq_attr "unit" "i387")
+ (plus (const_int 2)
+ (plus (attr "prefix_data16")
+ (attr "length_address")))
+ (ior (eq_attr "prefix" "vex")
+ (and (eq_attr "prefix" "maybe_vex")
+ (ne (symbol_ref "TARGET_AVX") (const_int 0))))
+ (plus (attr "length_vex")
+ (plus (attr "prefix_vex_imm8")
+ (plus (attr "modrm")
+ (attr "length_address"))))]
+ (plus (plus (attr "modrm")
+ (plus (attr "prefix_0f")
+ (plus (attr "prefix_rex")
+ (plus (attr "prefix_extra")
+ (const_int 1)))))
+ (plus (attr "prefix_rep")
+ (plus (attr "prefix_data16")
+ (plus (attr "length_immediate")
+ (attr "length_address")))))))
+
+;; The `memory' attribute is `none' if no memory is referenced, `load' or
+;; `store' if there is a simple memory reference therein, or `unknown'
+;; if the instruction is complex.
+
+(define_attr "memory" "none,load,store,both,unknown"
+ (cond [(eq_attr "type" "other,multi,str")
+ (const_string "unknown")
+ (eq_attr "type" "lea,fcmov,fpspc")
+ (const_string "none")
+ (eq_attr "type" "fistp,leave")
+ (const_string "both")
+ (eq_attr "type" "frndint")
+ (const_string "load")
+ (eq_attr "type" "push")
+ (if_then_else (match_operand 1 "memory_operand" "")
+ (const_string "both")
+ (const_string "store"))
+ (eq_attr "type" "pop")
+ (if_then_else (match_operand 0 "memory_operand" "")
+ (const_string "both")
+ (const_string "load"))
+ (eq_attr "type" "setcc")
+ (if_then_else (match_operand 0 "memory_operand" "")
+ (const_string "store")
+ (const_string "none"))
+ (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp")
+ (if_then_else (ior (match_operand 0 "memory_operand" "")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "load")
+ (const_string "none"))
+ (eq_attr "type" "ibr")
+ (if_then_else (match_operand 0 "memory_operand" "")
+ (const_string "load")
+ (const_string "none"))
+ (eq_attr "type" "call")
+ (if_then_else (match_operand 0 "constant_call_address_operand" "")
+ (const_string "none")
+ (const_string "load"))
+ (eq_attr "type" "callv")
+ (if_then_else (match_operand 1 "constant_call_address_operand" "")
+ (const_string "none")
+ (const_string "load"))
+ (and (eq_attr "type" "alu1,negnot,ishift1,sselog1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "both")
+ (and (match_operand 0 "memory_operand" "")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "both")
+ (match_operand 0 "memory_operand" "")
+ (const_string "store")
+ (match_operand 1 "memory_operand" "")
+ (const_string "load")
+ (and (eq_attr "type"
+ "!alu1,negnot,ishift1,
+ imov,imovx,icmp,test,bitmanip,
+ fmov,fcmp,fsgn,
+ sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,sselog1,
+ sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt")
+ (match_operand 2 "memory_operand" ""))
+ (const_string "load")
+ (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
+ (match_operand 3 "memory_operand" ""))
+ (const_string "load")
+ ]
+ (const_string "none")))
+
+;; Indicates if an instruction has both an immediate and a displacement.
+
+(define_attr "imm_disp" "false,true,unknown"
+ (cond [(eq_attr "type" "other,multi")
+ (const_string "unknown")
+ (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1")
+ (and (match_operand 0 "memory_displacement_operand" "")
+ (match_operand 1 "immediate_operand" "")))
+ (const_string "true")
+ (and (eq_attr "type" "alu,ishift,rotate,imul,idiv")
+ (and (match_operand 0 "memory_displacement_operand" "")
+ (match_operand 2 "immediate_operand" "")))
+ (const_string "true")
+ ]
+ (const_string "false")))
+
+;; Indicates if an FP operation has an integer source.
+
+(define_attr "fp_int_src" "false,true"
+ (const_string "false"))
+
+;; Defines rounding mode of an FP operation.
+
+(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
+ (const_string "any"))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+ [(set_attr "length" "128")
+ (set_attr "type" "multi")])
+
+;; All integer comparison codes.
+(define_code_iterator int_cond [ne eq ge gt le lt geu gtu leu ltu ])
+
+;; All floating-point comparison codes.
+(define_code_iterator fp_cond [unordered ordered
+ uneq unge ungt unle unlt ltgt ])
+
+(define_code_iterator plusminus [plus minus])
+
+(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
+
+;; Base name for define_insn
+(define_code_attr plusminus_insn
+ [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
+ (minus "sub") (ss_minus "sssub") (us_minus "ussub")])
+
+;; Base name for insn mnemonic.
+(define_code_attr plusminus_mnemonic
+ [(plus "add") (ss_plus "adds") (us_plus "addus")
+ (minus "sub") (ss_minus "subs") (us_minus "subus")])
+
+;; Mark commutative operators as such in constraints.
+(define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
+ (minus "") (ss_minus "") (us_minus "")])
+
+;; Mapping of signed max and min
+(define_code_iterator smaxmin [smax smin])
+
+;; Mapping of unsigned max and min
+(define_code_iterator umaxmin [umax umin])
+
+;; Mapping of signed/unsigned max and min
+(define_code_iterator maxmin [smax smin umax umin])
+
+;; Base name for integer and FP insn mnemonic
+(define_code_attr maxminiprefix [(smax "maxs") (smin "mins")
+ (umax "maxu") (umin "minu")])
+(define_code_attr maxminfprefix [(smax "max") (smin "min")])
+
+;; Mapping of parallel logic operators
+(define_code_iterator plogic [and ior xor])
+
+;; Base name for insn mnemonic.
+(define_code_attr plogicprefix [(and "and") (ior "or") (xor "xor")])
+
+;; Mapping of abs neg operators
+(define_code_iterator absneg [abs neg])
+
+;; Base name for x87 insn mnemonic.
+(define_code_attr absnegprefix [(abs "abs") (neg "chs")])
+
+;; All single word integer modes.
+(define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])
+
+;; Single word integer modes without QImode.
+(define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])
+
+;; Instruction suffix for integer modes.
+(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
+
+;; Register class for integer modes.
+(define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")])
+
+;; Immediate operand constraint for integer modes.
+(define_mode_attr i [(QI "n") (HI "n") (SI "i") (DI "e")])
+
+;; General operand predicate for integer modes.
+(define_mode_attr general_operand
+ [(QI "general_operand")
+ (HI "general_operand")
+ (SI "general_operand")
+ (DI "x86_64_general_operand")])
+
+;; SSE and x87 SFmode and DFmode floating point modes
+(define_mode_iterator MODEF [SF DF])
+
+;; All x87 floating point modes
+(define_mode_iterator X87MODEF [SF DF XF])
+
+;; All integer modes handled by x87 fisttp operator.
+(define_mode_iterator X87MODEI [HI SI DI])
+
+;; All integer modes handled by integer x87 operators.
+(define_mode_iterator X87MODEI12 [HI SI])
+
+;; All integer modes handled by SSE cvtts?2si* operators.
+(define_mode_iterator SSEMODEI24 [SI DI])
+
+;; SSE asm suffix for floating point modes
+(define_mode_attr ssemodefsuffix [(SF "s") (DF "d")])
+
+;; SSE vector mode corresponding to a scalar mode
+(define_mode_attr ssevecmode
+ [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")])
+
+;; Instruction suffix for REX 64bit operators.
+(define_mode_attr rex64suffix [(SI "") (DI "{q}")])
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities. Exactly one of the two alternatives will match.
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
+
+;; Scheduling descriptions
+
+(include "pentium.md")
+(include "ppro.md")
+(include "k6.md")
+(include "athlon.md")
+(include "geode.md")
+
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; Compare instructions.
+
+;; All compare insns have expanders that save the operands away without
+;; actually generating RTL. The bCOND or sCOND (emitted immediately
+;; after the cmp) will actually emit the cmpM.
+
+(define_expand "cmpti"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:TI 0 "nonimmediate_operand" "")
+ (match_operand:TI 1 "x86_64_general_operand" "")))]
+ "TARGET_64BIT"
+{
+ if (MEM_P (operands[0]) && MEM_P (operands[1]))
+ operands[0] = force_reg (TImode, operands[0]);
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+(define_expand "cmpdi"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:DI 0 "nonimmediate_operand" "")
+ (match_operand:DI 1 "x86_64_general_operand" "")))]
+ ""
+{
+ if (MEM_P (operands[0]) && MEM_P (operands[1]))
+ operands[0] = force_reg (DImode, operands[0]);
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+(define_expand "cmpsi"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:SI 0 "cmpsi_operand" "")
+ (match_operand:SI 1 "general_operand" "")))]
+ ""
+{
+ if (MEM_P (operands[0]) && MEM_P (operands[1]))
+ operands[0] = force_reg (SImode, operands[0]);
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+(define_expand "cmphi"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:HI 0 "nonimmediate_operand" "")
+ (match_operand:HI 1 "general_operand" "")))]
+ ""
+{
+ if (MEM_P (operands[0]) && MEM_P (operands[1]))
+ operands[0] = force_reg (HImode, operands[0]);
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+(define_expand "cmpqi"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:QI 0 "nonimmediate_operand" "")
+ (match_operand:QI 1 "general_operand" "")))]
+ "TARGET_QIMODE_MATH"
+{
+ if (MEM_P (operands[0]) && MEM_P (operands[1]))
+ operands[0] = force_reg (QImode, operands[0]);
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+(define_insn "cmpdi_ccno_1_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:DI 0 "nonimmediate_operand" "r,?mr")
+ (match_operand:DI 1 "const0_operand" "")))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+ "@
+ test{q}\t%0, %0
+ cmp{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test,icmp")
+ (set_attr "length_immediate" "0,1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*cmpdi_minus_1_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (minus:DI (match_operand:DI 0 "nonimmediate_operand" "rm,r")
+ (match_operand:DI 1 "x86_64_general_operand" "re,mr"))
+ (const_int 0)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)"
+ "cmp{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "DI")])
+
+(define_expand "cmpdi_1_rex64"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:DI 0 "nonimmediate_operand" "")
+ (match_operand:DI 1 "general_operand" "")))]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "cmpdi_1_insn_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:DI 0 "nonimmediate_operand" "mr,r")
+ (match_operand:DI 1 "x86_64_general_operand" "re,mr")))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+ "cmp{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "DI")])
+
+
+(define_insn "*cmpsi_ccno_1"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:SI 0 "nonimmediate_operand" "r,?mr")
+ (match_operand:SI 1 "const0_operand" "")))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "@
+ test{l}\t%0, %0
+ cmp{l}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test,icmp")
+ (set_attr "length_immediate" "0,1")
+ (set_attr "mode" "SI")])
+
+(define_insn "*cmpsi_minus_1"
+ [(set (reg FLAGS_REG)
+ (compare (minus:SI (match_operand:SI 0 "nonimmediate_operand" "rm,r")
+ (match_operand:SI 1 "general_operand" "ri,mr"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCGOCmode)"
+ "cmp{l}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "SI")])
+
+(define_expand "cmpsi_1"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:SI 0 "nonimmediate_operand" "")
+ (match_operand:SI 1 "general_operand" "")))]
+ ""
+ "")
+
+(define_insn "*cmpsi_1_insn"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:SI 0 "nonimmediate_operand" "rm,r")
+ (match_operand:SI 1 "general_operand" "ri,mr")))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && ix86_match_ccmode (insn, CCmode)"
+ "cmp{l}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "SI")])
+
+(define_insn "*cmphi_ccno_1"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:HI 0 "nonimmediate_operand" "r,?mr")
+ (match_operand:HI 1 "const0_operand" "")))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "@
+ test{w}\t%0, %0
+ cmp{w}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test,icmp")
+ (set_attr "length_immediate" "0,1")
+ (set_attr "mode" "HI")])
+
+(define_insn "*cmphi_minus_1"
+ [(set (reg FLAGS_REG)
+ (compare (minus:HI (match_operand:HI 0 "nonimmediate_operand" "rm,r")
+ (match_operand:HI 1 "general_operand" "rn,mr"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCGOCmode)"
+ "cmp{w}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "HI")])
+
+(define_insn "*cmphi_1"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:HI 0 "nonimmediate_operand" "rm,r")
+ (match_operand:HI 1 "general_operand" "rn,mr")))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && ix86_match_ccmode (insn, CCmode)"
+ "cmp{w}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "HI")])
+
+(define_insn "*cmpqi_ccno_1"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:QI 0 "nonimmediate_operand" "q,?mq")
+ (match_operand:QI 1 "const0_operand" "")))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "@
+ test{b}\t%0, %0
+ cmp{b}\t{$0, %0|%0, 0}"
+ [(set_attr "type" "test,icmp")
+ (set_attr "length_immediate" "0,1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_1"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:QI 0 "nonimmediate_operand" "qm,q")
+ (match_operand:QI 1 "general_operand" "qn,mq")))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_minus_1"
+ [(set (reg FLAGS_REG)
+ (compare (minus:QI (match_operand:QI 0 "nonimmediate_operand" "qm,q")
+ (match_operand:QI 1 "general_operand" "qn,mq"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCGOCmode)"
+ "cmp{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (match_operand:QI 0 "general_operand" "Qm")
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)))]
+ "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%h1, %0|%0, %h1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_1_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (match_operand:QI 0 "register_operand" "Q")
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%h1, %0|%0, %h1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_operand:QI 1 "const0_operand" "")))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "test{b}\t%h0, %h0"
+ [(set_attr "type" "test")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_expand "cmpqi_ext_3"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "")
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_operand:QI 1 "general_operand" "")))]
+ ""
+ "")
+
+(define_insn "cmpqi_ext_3_insn"
+ [(set (reg FLAGS_REG)
+ (compare
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_operand:QI 1 "general_operand" "Qmn")))]
+ "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%1, %h0|%h0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+(define_insn "cmpqi_ext_3_insn_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)
+ (match_operand:QI 1 "nonmemory_operand" "Qn")))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%1, %h0|%h0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+(define_insn "*cmpqi_ext_4"
+ [(set (reg FLAGS_REG)
+ (compare
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)
+ (subreg:QI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)))]
+ "ix86_match_ccmode (insn, CCmode)"
+ "cmp{b}\t{%h1, %h0|%h0, %h1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "QI")])
+
+;; These implement float point compares.
+;; %%% See if we can get away with VOIDmode operands on the actual insns,
+;; which would allow mix and match FP modes on the compares. Which is what
+;; the old patterns did, but with many more of them.
+
+(define_expand "cmpxf"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:XF 0 "nonmemory_operand" "")
+ (match_operand:XF 1 "nonmemory_operand" "")))]
+ "TARGET_80387"
+{
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+(define_expand "cmp<mode>"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand:MODEF 0 "cmp_fp_expander_operand" "")
+ (match_operand:MODEF 1 "cmp_fp_expander_operand" "")))]
+ "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ DONE;
+})
+
+;; FP compares, step 1:
+;; Set the FP condition codes.
+;;
+;; CCFPmode compare with exceptions
+;; CCFPUmode compare with no exceptions
+
+;; We may not use "#" to split and emit these, since the REG_DEAD notes
+;; used to manage the reg stack popping would not be preserved.
+
+(define_insn "*cmpfp_0"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI
+ [(compare:CCFP
+ (match_operand 1 "register_operand" "f")
+ (match_operand 2 "const0_operand" ""))]
+ UNSPEC_FNSTSW))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+ "* return output_fp_compare (insn, operands, 0, 0);"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))])
+
+(define_insn_and_split "*cmpfp_0_cc"
+ [(set (reg:CCFP FLAGS_REG)
+ (compare:CCFP
+ (match_operand 1 "register_operand" "f")
+ (match_operand 2 "const0_operand" "")))
+ (clobber (match_operand:HI 0 "register_operand" "=a"))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && TARGET_SAHF && !TARGET_CMOVE
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (unspec:HI
+ [(compare:CCFP (match_dup 1)(match_dup 2))]
+ UNSPEC_FNSTSW))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+ ""
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))])
+
+(define_insn "*cmpfp_xf"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI
+ [(compare:CCFP
+ (match_operand:XF 1 "register_operand" "f")
+ (match_operand:XF 2 "register_operand" "f"))]
+ UNSPEC_FNSTSW))]
+ "TARGET_80387"
+ "* return output_fp_compare (insn, operands, 0, 0);"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "mode" "XF")])
+
+(define_insn_and_split "*cmpfp_xf_cc"
+ [(set (reg:CCFP FLAGS_REG)
+ (compare:CCFP
+ (match_operand:XF 1 "register_operand" "f")
+ (match_operand:XF 2 "register_operand" "f")))
+ (clobber (match_operand:HI 0 "register_operand" "=a"))]
+ "TARGET_80387
+ && TARGET_SAHF && !TARGET_CMOVE"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (unspec:HI
+ [(compare:CCFP (match_dup 1)(match_dup 2))]
+ UNSPEC_FNSTSW))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+ ""
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "mode" "XF")])
+
+(define_insn "*cmpfp_<mode>"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI
+ [(compare:CCFP
+ (match_operand:MODEF 1 "register_operand" "f")
+ (match_operand:MODEF 2 "nonimmediate_operand" "fm"))]
+ UNSPEC_FNSTSW))]
+ "TARGET_80387"
+ "* return output_fp_compare (insn, operands, 0, 0);"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*cmpfp_<mode>_cc"
+ [(set (reg:CCFP FLAGS_REG)
+ (compare:CCFP
+ (match_operand:MODEF 1 "register_operand" "f")
+ (match_operand:MODEF 2 "nonimmediate_operand" "fm")))
+ (clobber (match_operand:HI 0 "register_operand" "=a"))]
+ "TARGET_80387
+ && TARGET_SAHF && !TARGET_CMOVE"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (unspec:HI
+ [(compare:CCFP (match_dup 1)(match_dup 2))]
+ UNSPEC_FNSTSW))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+ ""
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*cmpfp_u"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI
+ [(compare:CCFPU
+ (match_operand 1 "register_operand" "f")
+ (match_operand 2 "register_operand" "f"))]
+ UNSPEC_FNSTSW))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+ "* return output_fp_compare (insn, operands, 0, 1);"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))])
+
+(define_insn_and_split "*cmpfp_u_cc"
+ [(set (reg:CCFPU FLAGS_REG)
+ (compare:CCFPU
+ (match_operand 1 "register_operand" "f")
+ (match_operand 2 "register_operand" "f")))
+ (clobber (match_operand:HI 0 "register_operand" "=a"))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && TARGET_SAHF && !TARGET_CMOVE
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (unspec:HI
+ [(compare:CCFPU (match_dup 1)(match_dup 2))]
+ UNSPEC_FNSTSW))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+ ""
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))])
+
+(define_insn "*cmpfp_<mode>"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI
+ [(compare:CCFP
+ (match_operand 1 "register_operand" "f")
+ (match_operator 3 "float_operator"
+ [(match_operand:X87MODEI12 2 "memory_operand" "m")]))]
+ UNSPEC_FNSTSW))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
+ && (GET_MODE (operands [3]) == GET_MODE (operands[1]))"
+ "* return output_fp_compare (insn, operands, 0, 0);"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "fp_int_src" "true")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*cmpfp_<mode>_cc"
+ [(set (reg:CCFP FLAGS_REG)
+ (compare:CCFP
+ (match_operand 1 "register_operand" "f")
+ (match_operator 3 "float_operator"
+ [(match_operand:X87MODEI12 2 "memory_operand" "m")])))
+ (clobber (match_operand:HI 0 "register_operand" "=a"))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && TARGET_SAHF && !TARGET_CMOVE
+ && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
+ && (GET_MODE (operands [3]) == GET_MODE (operands[1]))"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (unspec:HI
+ [(compare:CCFP
+ (match_dup 1)
+ (match_op_dup 3 [(match_dup 2)]))]
+ UNSPEC_FNSTSW))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+ ""
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "fp_int_src" "true")
+ (set_attr "mode" "<MODE>")])
+
+;; FP compares, step 2
+;; Move the fpsw to ax.
+
+(define_insn "x86_fnstsw_1"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))]
+ "TARGET_80387"
+ "fnstsw\t%0"
+ [(set_attr "length" "2")
+ (set_attr "mode" "SI")
+ (set_attr "unit" "i387")])
+
+;; FP compares, step 3
+;; Get ax into flags, general case.
+
+(define_insn "x86_sahf_1"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:HI 0 "register_operand" "a")]
+ UNSPEC_SAHF))]
+ "TARGET_SAHF"
+{
+#ifdef HAVE_AS_IX86_SAHF
+ return "sahf";
+#else
+ return ".byte\t0x9e";
+#endif
+}
+ [(set_attr "length" "1")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")
+ (set_attr "mode" "SI")])
+
+;; Pentium Pro can do steps 1 through 3 in one go.
+;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes)
+(define_insn "*cmpfp_i_mixed"
+ [(set (reg:CCFP FLAGS_REG)
+ (compare:CCFP (match_operand 0 "register_operand" "f,x")
+ (match_operand 1 "nonimmediate_operand" "f,xm")))]
+ "TARGET_MIX_SSE_I387
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ "* return output_fp_compare (insn, operands, 1, 0);"
+ [(set_attr "type" "fcmp,ssecomi")
+ (set_attr "prefix" "orig,maybe_vex")
+ (set (attr "mode")
+ (if_then_else (match_operand:SF 1 "" "")
+ (const_string "SF")
+ (const_string "DF")))
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "*cmpfp_i_sse"
+ [(set (reg:CCFP FLAGS_REG)
+ (compare:CCFP (match_operand 0 "register_operand" "x")
+ (match_operand 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE_MATH
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ "* return output_fp_compare (insn, operands, 1, 0);"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix" "maybe_vex")
+ (set (attr "mode")
+ (if_then_else (match_operand:SF 1 "" "")
+ (const_string "SF")
+ (const_string "DF")))
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "*cmpfp_i_i387"
+ [(set (reg:CCFP FLAGS_REG)
+ (compare:CCFP (match_operand 0 "register_operand" "f")
+ (match_operand 1 "register_operand" "f")))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[0]))
+ && TARGET_CMOVE
+ && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH)
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ "* return output_fp_compare (insn, operands, 1, 0);"
+ [(set_attr "type" "fcmp")
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "*cmpfp_iu_mixed"
+ [(set (reg:CCFPU FLAGS_REG)
+ (compare:CCFPU (match_operand 0 "register_operand" "f,x")
+ (match_operand 1 "nonimmediate_operand" "f,xm")))]
+ "TARGET_MIX_SSE_I387
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ "* return output_fp_compare (insn, operands, 1, 1);"
+ [(set_attr "type" "fcmp,ssecomi")
+ (set_attr "prefix" "orig,maybe_vex")
+ (set (attr "mode")
+ (if_then_else (match_operand:SF 1 "" "")
+ (const_string "SF")
+ (const_string "DF")))
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "*cmpfp_iu_sse"
+ [(set (reg:CCFPU FLAGS_REG)
+ (compare:CCFPU (match_operand 0 "register_operand" "x")
+ (match_operand 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE_MATH
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ "* return output_fp_compare (insn, operands, 1, 1);"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix" "maybe_vex")
+ (set (attr "mode")
+ (if_then_else (match_operand:SF 1 "" "")
+ (const_string "SF")
+ (const_string "DF")))
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "*cmpfp_iu_387"
+ [(set (reg:CCFPU FLAGS_REG)
+ (compare:CCFPU (match_operand 0 "register_operand" "f")
+ (match_operand 1 "register_operand" "f")))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[0]))
+ && TARGET_CMOVE
+ && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH)
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+ "* return output_fp_compare (insn, operands, 1, 1);"
+ [(set_attr "type" "fcmp")
+ (set (attr "mode")
+ (cond [(match_operand:SF 1 "" "")
+ (const_string "SF")
+ (match_operand:DF 1 "" "")
+ (const_string "DF")
+ ]
+ (const_string "XF")))
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
+
+;; Move instructions.
+
+;; General case of fullword move.
+
+(define_expand "movsi"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (match_operand:SI 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (SImode, operands); DONE;")
+
+;; Push/pop instructions. They are separate since autoinc/dec is not a
+;; general_operand.
+;;
+;; %%% We don't use a post-inc memory reference because x86 is not a
+;; general AUTO_INC_DEC host, which impacts how it is treated in flow.
+;; Changing this impacts compiler performance on other non-AUTO_INC_DEC
+;; targets without our curiosities, and it is just as easy to represent
+;; this differently.
+
+(define_insn "*pushsi2"
+ [(set (match_operand:SI 0 "push_operand" "=<")
+ (match_operand:SI 1 "general_no_elim_operand" "ri*m"))]
+ "!TARGET_64BIT"
+ "push{l}\t%1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "SI")])
+
+;; For 64BIT abi we always round up to 8 bytes.
+(define_insn "*pushsi2_rex64"
+ [(set (match_operand:SI 0 "push_operand" "=X")
+ (match_operand:SI 1 "nonmemory_no_elim_operand" "ri"))]
+ "TARGET_64BIT"
+ "push{q}\t%q1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "SI")])
+
+(define_insn "*pushsi2_prologue"
+ [(set (match_operand:SI 0 "push_operand" "=<")
+ (match_operand:SI 1 "general_no_elim_operand" "ri*m"))
+ (clobber (mem:BLK (scratch)))]
+ "!TARGET_64BIT"
+ "push{l}\t%1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "SI")])
+
+(define_insn "*popsi1_epilogue"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
+ (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG) (const_int 4)))
+ (clobber (mem:BLK (scratch)))]
+ "!TARGET_64BIT"
+ "pop{l}\t%0"
+ [(set_attr "type" "pop")
+ (set_attr "mode" "SI")])
+
+(define_insn "popsi1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
+ (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG) (const_int 4)))]
+ "!TARGET_64BIT"
+ "pop{l}\t%0"
+ [(set_attr "type" "pop")
+ (set_attr "mode" "SI")])
+
+(define_insn "*movsi_xor"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (match_operand:SI 1 "const0_operand" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ "xor{l}\t%0, %0"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*movsi_or"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (match_operand:SI 1 "immediate_operand" "i"))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && operands[1] == constm1_rtx"
+{
+ operands[1] = constm1_rtx;
+ return "or{l}\t{%1, %0|%0, %1}";
+}
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")
+ (set_attr "length_immediate" "1")])
+
+(define_insn "*movsi_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand"
+ "=r,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x")
+ (match_operand:SI 1 "general_operand"
+ "g ,ri,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r ,m "))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_SSELOG1:
+ if (get_attr_mode (insn) == MODE_TI)
+ return "%vpxor\t%0, %d0";
+ return "%vxorps\t%0, %d0";
+
+ case TYPE_SSEMOV:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_TI:
+ return "%vmovdqa\t{%1, %0|%0, %1}";
+ case MODE_V4SF:
+ return "%vmovaps\t{%1, %0|%0, %1}";
+ case MODE_SI:
+ return "%vmovd\t{%1, %0|%0, %1}";
+ case MODE_SF:
+ return "%vmovss\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+
+ case TYPE_MMX:
+ return "pxor\t%0, %0";
+
+ case TYPE_MMXMOV:
+ if (get_attr_mode (insn) == MODE_DI)
+ return "movq\t{%1, %0|%0, %1}";
+ return "movd\t{%1, %0|%0, %1}";
+
+ case TYPE_LEA:
+ return "lea{l}\t{%1, %0|%0, %1}";
+
+ default:
+ gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
+ return "mov{l}\t{%1, %0|%0, %1}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "2")
+ (const_string "mmx")
+ (eq_attr "alternative" "3,4,5")
+ (const_string "mmxmov")
+ (eq_attr "alternative" "6")
+ (const_string "sselog1")
+ (eq_attr "alternative" "7,8,9,10,11")
+ (const_string "ssemov")
+ (match_operand:DI 1 "pic_32bit_operand" "")
+ (const_string "lea")
+ ]
+ (const_string "imov")))
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "0,1,2,3,4,5")
+ (const_string "orig")
+ (const_string "maybe_vex")))
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "2,3")
+ (const_string "DI")
+ (eq_attr "alternative" "6,7")
+ (if_then_else
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (and (eq_attr "alternative" "8,9,10,11")
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
+ (const_string "SF")
+ ]
+ (const_string "SI")))])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabssi_1_rex64"
+ [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:SI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+ "@
+ movabs{l}\t{%1, %P0|%P0, %1}
+ mov{l}\t{%1, %a0|%a0, %1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "memory" "store")
+ (set_attr "mode" "SI")])
+
+(define_insn "*movabssi_2_rex64"
+ [(set (match_operand:SI 0 "register_operand" "=a,r")
+ (mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+ "@
+ movabs{l}\t{%P1, %0|%0, %P1}
+ mov{l}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0")
+ (set_attr "memory" "load")
+ (set_attr "mode" "SI")])
+
+(define_insn "*swapsi"
+ [(set (match_operand:SI 0 "register_operand" "+r")
+ (match_operand:SI 1 "register_operand" "+r"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ ""
+ "xchg{l}\t%1, %0"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")])
+
+(define_expand "movhi"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (match_operand:HI 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (HImode, operands); DONE;")
+
+(define_insn "*pushhi2"
+ [(set (match_operand:HI 0 "push_operand" "=X")
+ (match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))]
+ "!TARGET_64BIT"
+ "push{l}\t%k1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "SI")])
+
+;; For 64BIT abi we always round up to 8 bytes.
+(define_insn "*pushhi2_rex64"
+ [(set (match_operand:HI 0 "push_operand" "=X")
+ (match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))]
+ "TARGET_64BIT"
+ "push{q}\t%q1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "DI")])
+
+(define_insn "*movhi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+ (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ /* movzwl is faster than movw on p2 due to partial word stalls,
+ though not as fast as an aligned movl. */
+ return "movz{wl|x}\t{%1, %k0|%k0, %1}";
+ default:
+ if (get_attr_mode (insn) == MODE_SI)
+ return "mov{l}\t{%k1, %k0|%k0, %k1}";
+ else
+ return "mov{w}\t{%1, %0|%0, %1}";
+ }
+}
+ [(set (attr "type")
+ (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
+ (const_string "imov")
+ (and (eq_attr "alternative" "0")
+ (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_HIMODE_MATH")
+ (const_int 0))))
+ (const_string "imov")
+ (and (eq_attr "alternative" "1,2")
+ (match_operand:HI 1 "aligned_operand" ""))
+ (const_string "imov")
+ (and (ne (symbol_ref "TARGET_MOVX")
+ (const_int 0))
+ (eq_attr "alternative" "0,2"))
+ (const_string "imovx")
+ ]
+ (const_string "imov")))
+ (set (attr "mode")
+ (cond [(eq_attr "type" "imovx")
+ (const_string "SI")
+ (and (eq_attr "alternative" "1,2")
+ (match_operand:HI 1 "aligned_operand" ""))
+ (const_string "SI")
+ (and (eq_attr "alternative" "0")
+ (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_HIMODE_MATH")
+ (const_int 0))))
+ (const_string "SI")
+ ]
+ (const_string "HI")))])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabshi_1_rex64"
+ [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:HI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+ "@
+ movabs{w}\t{%1, %P0|%P0, %1}
+ mov{w}\t{%1, %a0|%a0, %1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "memory" "store")
+ (set_attr "mode" "HI")])
+
+(define_insn "*movabshi_2_rex64"
+ [(set (match_operand:HI 0 "register_operand" "=a,r")
+ (mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+ "@
+ movabs{w}\t{%P1, %0|%0, %P1}
+ mov{w}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0")
+ (set_attr "memory" "load")
+ (set_attr "mode" "HI")])
+
+(define_insn "*swaphi_1"
+ [(set (match_operand:HI 0 "register_operand" "+r")
+ (match_operand:HI 1 "register_operand" "+r"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+ "xchg{l}\t%k1, %k0"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")])
+
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
+(define_insn "*swaphi_2"
+ [(set (match_operand:HI 0 "register_operand" "+r")
+ (match_operand:HI 1 "register_operand" "+r"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "TARGET_PARTIAL_REG_STALL"
+ "xchg{w}\t%1, %0"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "HI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")])
+
+(define_expand "movstricthi"
+ [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" ""))
+ (match_operand:HI 1 "general_operand" ""))]
+ ""
+{
+ if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
+ FAIL;
+ /* Don't generate memory->memory moves, go through a register */
+ if (MEM_P (operands[0]) && MEM_P (operands[1]))
+ operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_insn "*movstricthi_1"
+ [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+rm,r"))
+ (match_operand:HI 1 "general_operand" "rn,m"))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "mov{w}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "HI")])
+
+(define_insn "*movstricthi_xor"
+ [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r"))
+ (match_operand:HI 1 "const0_operand" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ "xor{w}\t%0, %0"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "HI")
+ (set_attr "length_immediate" "0")])
+
+(define_expand "movqi"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (match_operand:QI 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (QImode, operands); DONE;")
+
+;; emit_push_insn when it calls move_by_pieces requires an insn to
+;; "push a byte". But actually we use pushl, which has the effect
+;; of rounding the amount pushed up to a word.
+
+(define_insn "*pushqi2"
+ [(set (match_operand:QI 0 "push_operand" "=X")
+ (match_operand:QI 1 "nonmemory_no_elim_operand" "rn"))]
+ "!TARGET_64BIT"
+ "push{l}\t%k1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "SI")])
+
+;; For 64BIT abi we always round up to 8 bytes.
+(define_insn "*pushqi2_rex64"
+ [(set (match_operand:QI 0 "push_operand" "=X")
+ (match_operand:QI 1 "nonmemory_no_elim_operand" "qn"))]
+ "TARGET_64BIT"
+ "push{q}\t%q1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "DI")])
+
+;; Situation is quite tricky about when to choose full sized (SImode) move
+;; over QImode moves. For Q_REG -> Q_REG move we use full size only for
+;; partial register dependency machines (such as AMD Athlon), where QImode
+;; moves issue extra dependency and for partial register stalls machines
+;; that don't use QImode patterns (and QImode move cause stall on the next
+;; instruction).
+;;
+;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
+;; register stall machines with, where we use QImode instructions, since
+;; partial register stall can be caused there. Then we use movzx.
+(define_insn "*movqi_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m")
+ (match_operand:QI 1 "general_operand" " q,qn,qm,q,rn,qm,qn"))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]));
+ return "movz{bl|x}\t{%1, %k0|%k0, %1}";
+ default:
+ if (get_attr_mode (insn) == MODE_SI)
+ return "mov{l}\t{%k1, %k0|%k0, %k1}";
+ else
+ return "mov{b}\t{%1, %0|%0, %1}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (eq_attr "alternative" "5")
+ (not (match_operand:QI 1 "aligned_operand" "")))
+ (const_string "imovx")
+ (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
+ (const_string "imov")
+ (and (eq_attr "alternative" "3")
+ (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_QIMODE_MATH")
+ (const_int 0))))
+ (const_string "imov")
+ (eq_attr "alternative" "3,5")
+ (const_string "imovx")
+ (and (ne (symbol_ref "TARGET_MOVX")
+ (const_int 0))
+ (eq_attr "alternative" "2"))
+ (const_string "imovx")
+ ]
+ (const_string "imov")))
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "3,4,5")
+ (const_string "SI")
+ (eq_attr "alternative" "6")
+ (const_string "QI")
+ (eq_attr "type" "imovx")
+ (const_string "SI")
+ (and (eq_attr "type" "imov")
+ (and (eq_attr "alternative" "0,1")
+ (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (and (eq (symbol_ref "optimize_function_for_size_p (cfun)")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))))))
+ (const_string "SI")
+ ;; Avoid partial register stalls when not using QImode arithmetic
+ (and (eq_attr "type" "imov")
+ (and (eq_attr "alternative" "0,1")
+ (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_QIMODE_MATH")
+ (const_int 0)))))
+ (const_string "SI")
+ ]
+ (const_string "QI")))])
+
+(define_insn "*swapqi_1"
+ [(set (match_operand:QI 0 "register_operand" "+r")
+ (match_operand:QI 1 "register_operand" "+r"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+ "xchg{l}\t%k1, %k0"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
+
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
+(define_insn "*swapqi_2"
+ [(set (match_operand:QI 0 "register_operand" "+q")
+ (match_operand:QI 1 "register_operand" "+q"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "TARGET_PARTIAL_REG_STALL"
+ "xchg{b}\t%1, %0"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "QI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")])
+
+(define_expand "movstrictqi"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
+ (match_operand:QI 1 "general_operand" ""))]
+ ""
+{
+ if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
+ FAIL;
+ /* Don't generate memory->memory moves, go through a register. */
+ if (MEM_P (operands[0]) && MEM_P (operands[1]))
+ operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_insn "*movstrictqi_1"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+ (match_operand:QI 1 "general_operand" "*qn,m"))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "mov{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "QI")])
+
+(define_insn "*movstrictqi_xor"
+ [(set (strict_low_part (match_operand:QI 0 "q_regs_operand" "+q"))
+ (match_operand:QI 1 "const0_operand" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ "xor{b}\t%0, %0"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*movsi_extv_1"
+ [(set (match_operand:SI 0 "register_operand" "=R")
+ (sign_extract:SI (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)))]
+ ""
+ "movs{bl|x}\t{%h1, %0|%0, %h1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+(define_insn "*movhi_extv_1"
+ [(set (match_operand:HI 0 "register_operand" "=R")
+ (sign_extract:HI (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)))]
+ ""
+ "movs{bl|x}\t{%h1, %k0|%k0, %h1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+(define_insn "*movqi_extv_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?r")
+ (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)))]
+ "!TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ return "movs{bl|x}\t{%h1, %k0|%k0, %h1}";
+ default:
+ return "mov{b}\t{%h1, %0|%0, %h1}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (and (match_operand:QI 0 "register_operand" "")
+ (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+ (ne (symbol_ref "TARGET_MOVX")
+ (const_int 0))))
+ (const_string "imovx")
+ (const_string "imov")))
+ (set (attr "mode")
+ (if_then_else (eq_attr "type" "imovx")
+ (const_string "SI")
+ (const_string "QI")))])
+
+(define_insn "*movqi_extv_1_rex64"
+ [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+ (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)))]
+ "TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ return "movs{bl|x}\t{%h1, %k0|%k0, %h1}";
+ default:
+ return "mov{b}\t{%h1, %0|%0, %h1}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (and (match_operand:QI 0 "register_operand" "")
+ (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+ (ne (symbol_ref "TARGET_MOVX")
+ (const_int 0))))
+ (const_string "imovx")
+ (const_string "imov")))
+ (set (attr "mode")
+ (if_then_else (eq_attr "type" "imovx")
+ (const_string "SI")
+ (const_string "QI")))])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabsqi_1_rex64"
+ [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:QI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+ "@
+ movabs{b}\t{%1, %P0|%P0, %1}
+ mov{b}\t{%1, %a0|%a0, %1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "memory" "store")
+ (set_attr "mode" "QI")])
+
+(define_insn "*movabsqi_2_rex64"
+ [(set (match_operand:QI 0 "register_operand" "=a,r")
+ (mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+ "@
+ movabs{b}\t{%P1, %0|%0, %P1}
+ mov{b}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0")
+ (set_attr "memory" "load")
+ (set_attr "mode" "QI")])
+
+(define_insn "*movdi_extzv_1"
+ [(set (match_operand:DI 0 "register_operand" "=R")
+ (zero_extract:DI (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)))]
+ "TARGET_64BIT"
+ "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "DI")])
+
+(define_insn "*movsi_extzv_1"
+ [(set (match_operand:SI 0 "register_operand" "=R")
+ (zero_extract:SI (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)))]
+ ""
+ "movz{bl|x}\t{%h1, %0|%0, %h1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+(define_insn "*movqi_extzv_2"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R")
+ (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)) 0))]
+ "!TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+ default:
+ return "mov{b}\t{%h1, %0|%0, %h1}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (and (match_operand:QI 0 "register_operand" "")
+ (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+ (ne (symbol_ref "TARGET_MOVX")
+ (const_int 0))))
+ (const_string "imovx")
+ (const_string "imov")))
+ (set (attr "mode")
+ (if_then_else (eq_attr "type" "imovx")
+ (const_string "SI")
+ (const_string "QI")))])
+
+(define_insn "*movqi_extzv_2_rex64"
+ [(set (match_operand:QI 0 "register_operand" "=Q,?R")
+ (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q")
+ (const_int 8)
+ (const_int 8)) 0))]
+ "TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ return "movz{bl|x}\t{%h1, %k0|%k0, %h1}";
+ default:
+ return "mov{b}\t{%h1, %0|%0, %h1}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" ""))
+ (ne (symbol_ref "TARGET_MOVX")
+ (const_int 0)))
+ (const_string "imovx")
+ (const_string "imov")))
+ (set (attr "mode")
+ (if_then_else (eq_attr "type" "imovx")
+ (const_string "SI")
+ (const_string "QI")))])
+
+(define_insn "movsi_insv_1"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:SI 1 "general_operand" "Qmn"))]
+ "!TARGET_64BIT"
+ "mov{b}\t{%b1, %h0|%h0, %b1}"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "QI")])
+
+(define_insn "*movsi_insv_1_rex64"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:SI 1 "nonmemory_operand" "Qn"))]
+ "TARGET_64BIT"
+ "mov{b}\t{%b1, %h0|%h0, %b1}"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "QI")])
+
+(define_insn "movdi_insv_1_rex64"
+ [(set (zero_extract:DI (match_operand 0 "ext_register_operand" "+Q")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:DI 1 "nonmemory_operand" "Qn"))]
+ "TARGET_64BIT"
+ "mov{b}\t{%b1, %h0|%h0, %b1}"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "QI")])
+
+(define_insn "*movqi_insv_2"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
+ (const_int 8)
+ (const_int 8))
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q")
+ (const_int 8)))]
+ ""
+ "mov{b}\t{%h1, %h0|%h0, %h1}"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "QI")])
+
+(define_expand "movdi"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (match_operand:DI 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (DImode, operands); DONE;")
+
+(define_insn "*pushdi"
+ [(set (match_operand:DI 0 "push_operand" "=<")
+ (match_operand:DI 1 "general_no_elim_operand" "riF*m"))]
+ "!TARGET_64BIT"
+ "#")
+
+(define_insn "*pushdi2_rex64"
+ [(set (match_operand:DI 0 "push_operand" "=<,!<")
+ (match_operand:DI 1 "general_no_elim_operand" "re*m,n"))]
+ "TARGET_64BIT"
+ "@
+ push{q}\t%1
+ #"
+ [(set_attr "type" "push,multi")
+ (set_attr "mode" "DI")])
+
+;; Convert impossible pushes of immediate to existing instructions.
+;; First try to get scratch register and go through it. In case this
+;; fails, push sign extended lower part first and then overwrite
+;; upper part by 32bit move.
+(define_peephole2
+ [(match_scratch:DI 2 "r")
+ (set (match_operand:DI 0 "push_operand" "")
+ (match_operand:DI 1 "immediate_operand" ""))]
+ "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+ && !x86_64_immediate_operand (operands[1], DImode)"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+;; We need to define this as both peepholer and splitter for case
+;; peephole2 pass is not run.
+;; "&& 1" is needed to keep it from matching the previous pattern.
+(define_peephole2
+ [(set (match_operand:DI 0 "push_operand" "")
+ (match_operand:DI 1 "immediate_operand" ""))]
+ "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+ && !x86_64_immediate_operand (operands[1], DImode) && 1"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))]
+ "split_di (&operands[1], 1, &operands[2], &operands[3]);
+ operands[1] = gen_lowpart (DImode, operands[2]);
+ operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+ GEN_INT (4)));
+ ")
+
+(define_split
+ [(set (match_operand:DI 0 "push_operand" "")
+ (match_operand:DI 1 "immediate_operand" ""))]
+ "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+ ? epilogue_completed : reload_completed)
+ && !symbolic_operand (operands[1], DImode)
+ && !x86_64_immediate_operand (operands[1], DImode)"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))]
+ "split_di (&operands[1], 1, &operands[2], &operands[3]);
+ operands[1] = gen_lowpart (DImode, operands[2]);
+ operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+ GEN_INT (4)));
+ ")
+
+(define_insn "*pushdi2_prologue_rex64"
+ [(set (match_operand:DI 0 "push_operand" "=<")
+ (match_operand:DI 1 "general_no_elim_operand" "re*m"))
+ (clobber (mem:BLK (scratch)))]
+ "TARGET_64BIT"
+ "push{q}\t%1"
+ [(set_attr "type" "push")
+ (set_attr "mode" "DI")])
+
+(define_insn "*popdi1_epilogue_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m")
+ (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG)
+ (plus:DI (reg:DI SP_REG) (const_int 8)))
+ (clobber (mem:BLK (scratch)))]
+ "TARGET_64BIT"
+ "pop{q}\t%0"
+ [(set_attr "type" "pop")
+ (set_attr "mode" "DI")])
+
+(define_insn "popdi1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m")
+ (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG)
+ (plus:DI (reg:DI SP_REG) (const_int 8)))]
+ "TARGET_64BIT"
+ "pop{q}\t%0"
+ [(set_attr "type" "pop")
+ (set_attr "mode" "DI")])
+
+(define_insn "*movdi_xor_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (match_operand:DI 1 "const0_operand" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && reload_completed"
+ "xor{l}\t%k0, %k0";
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*movdi_or_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (match_operand:DI 1 "const_int_operand" "i"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && reload_completed
+ && operands[1] == constm1_rtx"
+{
+ operands[1] = constm1_rtx;
+ return "or{q}\t{%1, %0|%0, %1}";
+}
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "DI")
+ (set_attr "length_immediate" "1")])
+
+(define_insn "*movdi_2"
+ [(set (match_operand:DI 0 "nonimmediate_operand"
+ "=r ,o ,*y,m*y,*y,*Y2,m ,*Y2,*Y2,*x,m ,*x,*x")
+ (match_operand:DI 1 "general_operand"
+ "riFo,riF,C ,*y ,m ,C ,*Y2,*Y2,m ,C ,*x,*x,m "))]
+ "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ #
+ #
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ %vpxor\t%0, %d0
+ %vmovq\t{%1, %0|%0, %1}
+ %vmovdqa\t{%1, %0|%0, %1}
+ %vmovq\t{%1, %0|%0, %1}
+ xorps\t%0, %0
+ movlps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "5,6,7,8")
+ (const_string "vex")
+ (const_string "orig")))
+ (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")])
+
+(define_split
+ [(set (match_operand:DI 0 "push_operand" "")
+ (match_operand:DI 1 "general_operand" ""))]
+ "!TARGET_64BIT && reload_completed
+ && (! MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+;; %%% This multiword shite has got to go.
+(define_split
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (match_operand:DI 1 "general_operand" ""))]
+ "!TARGET_64BIT && reload_completed
+ && (!MMX_REG_P (operands[0]) && !SSE_REG_P (operands[0]))
+ && (!MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*movdi_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand"
+ "=r,r ,r,m ,!m,*y,*y,?r ,m ,?*Ym,?*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym")
+ (match_operand:DI 1 "general_operand"
+ "Z ,rem,i,re,n ,C ,*y,*Ym,*y,r ,m ,C ,*x,*Yi,*x,r ,m ,*Ym,*x"))]
+ "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_SSECVT:
+ if (SSE_REG_P (operands[0]))
+ return "movq2dq\t{%1, %0|%0, %1}";
+ else
+ return "movdq2q\t{%1, %0|%0, %1}";
+
+ case TYPE_SSEMOV:
+ if (TARGET_AVX)
+ {
+ if (get_attr_mode (insn) == MODE_TI)
+ return "vmovdqa\t{%1, %0|%0, %1}";
+ else
+ return "vmovq\t{%1, %0|%0, %1}";
+ }
+
+ if (get_attr_mode (insn) == MODE_TI)
+ return "movdqa\t{%1, %0|%0, %1}";
+ /* FALLTHRU */
+
+ case TYPE_MMXMOV:
+ /* Moves from and into integer register is done using movd
+ opcode with REX prefix. */
+ if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))
+ return "movd\t{%1, %0|%0, %1}";
+ return "movq\t{%1, %0|%0, %1}";
+
+ case TYPE_SSELOG1:
+ return "%vpxor\t%0, %d0";
+
+ case TYPE_MMX:
+ return "pxor\t%0, %0";
+
+ case TYPE_MULTI:
+ return "#";
+
+ case TYPE_LEA:
+ return "lea{q}\t{%a1, %0|%0, %a1}";
+
+ default:
+ gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
+ if (get_attr_mode (insn) == MODE_SI)
+ return "mov{l}\t{%k1, %k0|%k0, %k1}";
+ else if (which_alternative == 2)
+ return "movabs{q}\t{%1, %0|%0, %1}";
+ else
+ return "mov{q}\t{%1, %0|%0, %1}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "5")
+ (const_string "mmx")
+ (eq_attr "alternative" "6,7,8,9,10")
+ (const_string "mmxmov")
+ (eq_attr "alternative" "11")
+ (const_string "sselog1")
+ (eq_attr "alternative" "12,13,14,15,16")
+ (const_string "ssemov")
+ (eq_attr "alternative" "17,18")
+ (const_string "ssecvt")
+ (eq_attr "alternative" "4")
+ (const_string "multi")
+ (match_operand:DI 1 "pic_32bit_operand" "")
+ (const_string "lea")
+ ]
+ (const_string "imov")))
+ (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
+ (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "11,12,13,14,15,16")
+ (const_string "maybe_vex")
+ (const_string "orig")))
+ (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")])
+
+;; Stores and loads of ax to arbitrary constant address.
+;; We fake an second form of instruction to force reload to load address
+;; into register when rax is not available
+(define_insn "*movabsdi_1_rex64"
+ [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r"))
+ (match_operand:DI 1 "nonmemory_operand" "a,er"))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 0)"
+ "@
+ movabs{q}\t{%1, %P0|%P0, %1}
+ mov{q}\t{%1, %a0|%a0, %1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "memory" "store")
+ (set_attr "mode" "DI")])
+
+(define_insn "*movabsdi_2_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=a,r")
+ (mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))]
+ "TARGET_64BIT && ix86_check_movabs (insn, 1)"
+ "@
+ movabs{q}\t{%P1, %0|%0, %P1}
+ mov{q}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0,*")
+ (set_attr "length_address" "8,0")
+ (set_attr "length_immediate" "0")
+ (set_attr "memory" "load")
+ (set_attr "mode" "DI")])
+
+;; Convert impossible stores of immediate to existing instructions.
+;; First try to get scratch register and go through it. In case this
+;; fails, move by 32bit parts.
+(define_peephole2
+ [(match_scratch:DI 2 "r")
+ (set (match_operand:DI 0 "memory_operand" "")
+ (match_operand:DI 1 "immediate_operand" ""))]
+ "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+ && !x86_64_immediate_operand (operands[1], DImode)"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+;; We need to define this as both peepholer and splitter for case
+;; peephole2 pass is not run.
+;; "&& 1" is needed to keep it from matching the previous pattern.
+(define_peephole2
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (match_operand:DI 1 "immediate_operand" ""))]
+ "TARGET_64BIT && !symbolic_operand (operands[1], DImode)
+ && !x86_64_immediate_operand (operands[1], DImode) && 1"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (match_dup 5))]
+ "split_di (&operands[0], 2, &operands[2], &operands[4]);")
+
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (match_operand:DI 1 "immediate_operand" ""))]
+ "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+ ? epilogue_completed : reload_completed)
+ && !symbolic_operand (operands[1], DImode)
+ && !x86_64_immediate_operand (operands[1], DImode)"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (match_dup 5))]
+ "split_di (&operands[0], 2, &operands[2], &operands[4]);")
+
+(define_insn "*swapdi_rex64"
+ [(set (match_operand:DI 0 "register_operand" "+r")
+ (match_operand:DI 1 "register_operand" "+r"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "TARGET_64BIT"
+ "xchg{q}\t%1, %0"
+ [(set_attr "type" "imov")
+ (set_attr "mode" "DI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")])
+
+(define_expand "movoi"
+ [(set (match_operand:OI 0 "nonimmediate_operand" "")
+ (match_operand:OI 1 "general_operand" ""))]
+ "TARGET_AVX"
+ "ix86_expand_move (OImode, operands); DONE;")
+
+(define_insn "*movoi_internal"
+ [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:OI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_AVX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "vxorps\t%0, %0, %0";
+ case 1:
+ case 2:
+ if (misaligned_operand (operands[0], OImode)
+ || misaligned_operand (operands[1], OImode))
+ return "vmovdqu\t{%1, %0|%0, %1}";
+ else
+ return "vmovdqa\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_expand "movti"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (match_operand:TI 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE || TARGET_64BIT"
+{
+ if (TARGET_64BIT)
+ ix86_expand_move (TImode, operands);
+ else if (push_operand (operands[0], TImode))
+ ix86_expand_push (TImode, operands[1]);
+ else
+ ix86_expand_vector_move (TImode, operands);
+ DONE;
+})
+
+(define_insn "*movti_internal"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
+ (match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
+ "TARGET_SSE && !TARGET_64BIT
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "%vxorps\t%0, %d0";
+ else
+ return "%vpxor\t%0, %d0";
+ case 1:
+ case 2:
+ /* TDmode values are passed as TImode on the stack. Moving them
+ to stack may result in unaligned memory access. */
+ if (misaligned_operand (operands[0], TImode)
+ || misaligned_operand (operands[1], TImode))
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "%vmovups\t{%1, %0|%0, %1}";
+ else
+ return "%vmovdqu\t{%1, %0|%0, %1}";
+ }
+ else
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "%vmovaps\t{%1, %0|%0, %1}";
+ else
+ return "%vmovdqa\t{%1, %0|%0, %1}";
+ }
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set_attr "prefix" "maybe_vex")
+ (set (attr "mode")
+ (cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+ (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0)))
+ (const_string "V4SF")
+ (and (eq_attr "alternative" "2")
+ (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0)))
+ (const_string "V4SF")]
+ (const_string "TI")))])
+
+(define_insn "*movti_rex64"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=!r,o,x,x,xm")
+ (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))]
+ "TARGET_64BIT
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return "#";
+ case 2:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "%vxorps\t%0, %d0";
+ else
+ return "%vpxor\t%0, %d0";
+ case 3:
+ case 4:
+ /* TDmode values are passed as TImode on the stack. Moving them
+ to stack may result in unaligned memory access. */
+ if (misaligned_operand (operands[0], TImode)
+ || misaligned_operand (operands[1], TImode))
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "%vmovups\t{%1, %0|%0, %1}";
+ else
+ return "%vmovdqu\t{%1, %0|%0, %1}";
+ }
+ else
+ {
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "%vmovaps\t{%1, %0|%0, %1}";
+ else
+ return "%vmovdqa\t{%1, %0|%0, %1}";
+ }
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "*,*,sselog1,ssemov,ssemov")
+ (set_attr "prefix" "*,*,maybe_vex,maybe_vex,maybe_vex")
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "2,3")
+ (if_then_else
+ (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "4")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "DI")))])
+
+(define_split
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (match_operand:TI 1 "general_operand" ""))]
+ "reload_completed && !SSE_REG_P (operands[0])
+ && !SSE_REG_P (operands[1])"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+;; This expands to what emit_move_complex would generate if we didn't
+;; have a movti pattern. Having this avoids problems with reload on
+;; 32-bit targets when SSE is present, but doesn't seem to be harmful
+;; to have around all the time.
+(define_expand "movcdi"
+ [(set (match_operand:CDI 0 "nonimmediate_operand" "")
+ (match_operand:CDI 1 "general_operand" ""))]
+ ""
+{
+ if (push_operand (operands[0], CDImode))
+ emit_move_complex_push (CDImode, operands[0], operands[1]);
+ else
+ emit_move_complex_parts (operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "movsf"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ (match_operand:SF 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (SFmode, operands); DONE;")
+
+(define_insn "*pushsf"
+ [(set (match_operand:SF 0 "push_operand" "=<,<,<")
+ (match_operand:SF 1 "general_no_elim_operand" "f,rFm,x"))]
+ "!TARGET_64BIT"
+{
+ /* Anything else should be already split before reg-stack. */
+ gcc_assert (which_alternative == 1);
+ return "push{l}\t%1";
+}
+ [(set_attr "type" "multi,push,multi")
+ (set_attr "unit" "i387,*,*")
+ (set_attr "mode" "SF,SI,SF")])
+
+(define_insn "*pushsf_rex64"
+ [(set (match_operand:SF 0 "push_operand" "=X,X,X")
+ (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))]
+ "TARGET_64BIT"
+{
+ /* Anything else should be already split before reg-stack. */
+ gcc_assert (which_alternative == 1);
+ return "push{q}\t%q1";
+}
+ [(set_attr "type" "multi,push,multi")
+ (set_attr "unit" "i387,*,*")
+ (set_attr "mode" "SF,DI,SF")])
+
+(define_split
+ [(set (match_operand:SF 0 "push_operand" "")
+ (match_operand:SF 1 "memory_operand" ""))]
+ "reload_completed
+ && MEM_P (operands[1])
+ && (operands[2] = find_constant_src (insn))"
+ [(set (match_dup 0)
+ (match_dup 2))])
+
+
+;; %%% Kill this when call knows how to work this out.
+(define_split
+ [(set (match_operand:SF 0 "push_operand" "")
+ (match_operand:SF 1 "any_fp_register_operand" ""))]
+ "!TARGET_64BIT"
+ [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
+ (set (mem:SF (reg:SI SP_REG)) (match_dup 1))])
+
+(define_split
+ [(set (match_operand:SF 0 "push_operand" "")
+ (match_operand:SF 1 "any_fp_register_operand" ""))]
+ "TARGET_64BIT"
+ [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
+ (set (mem:SF (reg:DI SP_REG)) (match_dup 1))])
+
+(define_insn "*movsf_1"
+ [(set (match_operand:SF 0 "nonimmediate_operand"
+ "=f,m,f,r ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r")
+ (match_operand:SF 1 "general_operand"
+ "fm,f,G,rmF,Fr,C,x,xm,x,m ,*y,*y ,r ,Yi,r ,*Ym"))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && (reload_in_progress || reload_completed
+ || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+ || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun)
+ && standard_80387_constant_p (operands[1]))
+ || GET_CODE (operands[1]) != CONST_DOUBLE
+ || memory_operand (operands[0], SFmode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return output_387_reg_move (insn, operands);
+
+ case 2:
+ return standard_80387_constant_opcode (operands[1]);
+
+ case 3:
+ case 4:
+ return "mov{l}\t{%1, %0|%0, %1}";
+ case 5:
+ if (get_attr_mode (insn) == MODE_TI)
+ return "%vpxor\t%0, %d0";
+ else
+ return "%vxorps\t%0, %d0";
+ case 6:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "%vmovaps\t{%1, %0|%0, %1}";
+ else
+ return "%vmovss\t{%1, %d0|%d0, %1}";
+ case 7:
+ if (TARGET_AVX)
+ return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}"
+ : "vmovss\t{%1, %0|%0, %1}";
+ else
+ return "movss\t{%1, %0|%0, %1}";
+ case 8:
+ return "%vmovss\t{%1, %0|%0, %1}";
+
+ case 9: case 10: case 14: case 15:
+ return "movd\t{%1, %0|%0, %1}";
+ case 12: case 13:
+ return "%vmovd\t{%1, %0|%0, %1}";
+
+ case 11:
+ return "movq\t{%1, %0|%0, %1}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "5,6,7,8,12,13")
+ (const_string "maybe_vex")
+ (const_string "orig")))
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "3,4,9,10")
+ (const_string "SI")
+ (eq_attr "alternative" "5")
+ (if_then_else
+ (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_SSE2")
+ (const_int 0)))
+ (eq (symbol_ref "optimize_function_for_size_p (cfun)")
+ (const_int 0)))
+ (const_string "TI")
+ (const_string "V4SF"))
+ /* For architectures resolving dependencies on
+ whole SSE registers use APS move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ Do the same for architectures resolving dependencies on
+ the parts. While in DF mode it is better to always handle
+ just register parts, the SF mode is different due to lack
+ of instructions to load just part of the register. It is
+ better to maintain the whole registers in single format
+ to avoid problems on using packed logical operations. */
+ (eq_attr "alternative" "6")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "SF"))
+ (eq_attr "alternative" "11")
+ (const_string "DI")]
+ (const_string "SF")))])
+
+(define_insn "*swapsf"
+ [(set (match_operand:SF 0 "fp_register_operand" "+f")
+ (match_operand:SF 1 "fp_register_operand" "+f"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "reload_completed || TARGET_80387"
+{
+ if (STACK_TOP_P (operands[0]))
+ return "fxch\t%1";
+ else
+ return "fxch\t%0";
+}
+ [(set_attr "type" "fxch")
+ (set_attr "mode" "SF")])
+
+(define_expand "movdf"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "")
+ (match_operand:DF 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (DFmode, operands); DONE;")
+
+;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
+;; Size of pushdf using integer instructions is 2+2*memory operand size
+;; On the average, pushdf using integers can be still shorter. Allow this
+;; pattern for optimize_size too.
+
+(define_insn "*pushdf_nointeger"
+ [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
+ (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))]
+ "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES"
+{
+ /* This insn should be already split before reg-stack. */
+ gcc_unreachable ();
+}
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387,*,*,*")
+ (set_attr "mode" "DF,SI,SI,DF")])
+
+(define_insn "*pushdf_integer"
+ [(set (match_operand:DF 0 "push_operand" "=<,<,<")
+ (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))]
+ "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES"
+{
+ /* This insn should be already split before reg-stack. */
+ gcc_unreachable ();
+}
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387,*,*")
+ (set_attr "mode" "DF,SI,DF")])
+
+;; %%% Kill this when call knows how to work this out.
+(define_split
+ [(set (match_operand:DF 0 "push_operand" "")
+ (match_operand:DF 1 "any_fp_register_operand" ""))]
+ "reload_completed"
+ [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
+ (set (mem:DF (reg:P SP_REG)) (match_dup 1))]
+ "")
+
+(define_split
+ [(set (match_operand:DF 0 "push_operand" "")
+ (match_operand:DF 1 "general_operand" ""))]
+ "reload_completed"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+;; Moving is usually shorter when only FP registers are used. This separate
+;; movdf pattern avoids the use of integer registers for FP operations
+;; when optimizing for size.
+
+(define_insn "*movdf_nointeger"
+ [(set (match_operand:DF 0 "nonimmediate_operand"
+ "=f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ")
+ (match_operand:DF 1 "general_operand"
+ "fm,f,G,*roF,*Fr,C ,Y2*x,mY2*x,Y2*x"))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && ((optimize_function_for_size_p (cfun)
+ || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT)
+ && (reload_in_progress || reload_completed
+ || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+ || (!(TARGET_SSE2 && TARGET_SSE_MATH)
+ && optimize_function_for_size_p (cfun)
+ && !memory_operand (operands[0], DFmode)
+ && standard_80387_constant_p (operands[1]))
+ || GET_CODE (operands[1]) != CONST_DOUBLE
+ || ((optimize_function_for_size_p (cfun)
+ || !TARGET_MEMORY_MISMATCH_STALL
+ || reload_in_progress || reload_completed)
+ && memory_operand (operands[0], DFmode)))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return output_387_reg_move (insn, operands);
+
+ case 2:
+ return standard_80387_constant_opcode (operands[1]);
+
+ case 3:
+ case 4:
+ return "#";
+ case 5:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "%vxorps\t%0, %d0";
+ case MODE_V2DF:
+ return "%vxorpd\t%0, %d0";
+ case MODE_TI:
+ return "%vpxor\t%0, %d0";
+ default:
+ gcc_unreachable ();
+ }
+ case 6:
+ case 7:
+ case 8:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "%vmovaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "%vmovapd\t{%1, %0|%0, %1}";
+ case MODE_TI:
+ return "%vmovdqa\t{%1, %0|%0, %1}";
+ case MODE_DI:
+ return "%vmovq\t{%1, %0|%0, %1}";
+ case MODE_DF:
+ if (TARGET_AVX)
+ {
+ if (REG_P (operands[0]) && REG_P (operands[1]))
+ return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+ else
+ return "vmovsd\t{%1, %0|%0, %1}";
+ }
+ else
+ return "movsd\t{%1, %0|%0, %1}";
+ case MODE_V1DF:
+ if (TARGET_AVX)
+ {
+ if (REG_P (operands[0]))
+ return "vmovlpd\t{%1, %0, %0|%0, %0, %1}";
+ else
+ return "vmovlpd\t{%1, %0|%0, %1}";
+ }
+ else
+ return "movlpd\t{%1, %0|%0, %1}";
+ case MODE_V2SF:
+ if (TARGET_AVX)
+ {
+ if (REG_P (operands[0]))
+ return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
+ else
+ return "vmovlps\t{%1, %0|%0, %1}";
+ }
+ else
+ return "movlps\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+ (const_string "orig")
+ (const_string "maybe_vex")))
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0,1,2")
+ (const_string "DF")
+ (eq_attr "alternative" "3,4")
+ (const_string "SI")
+
+ /* For SSE1, we have many fewer alternatives. */
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+ (cond [(eq_attr "alternative" "5,6")
+ (const_string "V4SF")
+ ]
+ (const_string "V2SF"))
+
+ /* xorps is one byte shorter. */
+ (eq_attr "alternative" "5")
+ (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (const_string "TI")
+ ]
+ (const_string "V2DF"))
+
+ /* For architectures resolving dependencies on
+ whole SSE registers use APD move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ movaps encodes one byte shorter. */
+ (eq_attr "alternative" "6")
+ (cond
+ [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (const_string "V2DF")
+ ]
+ (const_string "DF"))
+ /* For architectures resolving dependencies on register
+ parts we may avoid extra work to zero out upper part
+ of register. */
+ (eq_attr "alternative" "7")
+ (if_then_else
+ (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+ (const_int 0))
+ (const_string "V1DF")
+ (const_string "DF"))
+ ]
+ (const_string "DF")))])
+
+(define_insn "*movdf_integer_rex64"
+ [(set (match_operand:DF 0 "nonimmediate_operand"
+ "=f,m,f,r ,m ,Y2*x,Y2*x,Y2*x,m ,Yi,r ")
+ (match_operand:DF 1 "general_operand"
+ "fm,f,G,rmF,Fr,C ,Y2*x,m ,Y2*x,r ,Yi"))]
+ "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && (reload_in_progress || reload_completed
+ || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+ || (!(TARGET_SSE2 && TARGET_SSE_MATH)
+ && optimize_function_for_size_p (cfun)
+ && standard_80387_constant_p (operands[1]))
+ || GET_CODE (operands[1]) != CONST_DOUBLE
+ || memory_operand (operands[0], DFmode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return output_387_reg_move (insn, operands);
+
+ case 2:
+ return standard_80387_constant_opcode (operands[1]);
+
+ case 3:
+ case 4:
+ return "#";
+
+ case 5:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "%vxorps\t%0, %d0";
+ case MODE_V2DF:
+ return "%vxorpd\t%0, %d0";
+ case MODE_TI:
+ return "%vpxor\t%0, %d0";
+ default:
+ gcc_unreachable ();
+ }
+ case 6:
+ case 7:
+ case 8:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "%vmovaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "%vmovapd\t{%1, %0|%0, %1}";
+ case MODE_TI:
+ return "%vmovdqa\t{%1, %0|%0, %1}";
+ case MODE_DI:
+ return "%vmovq\t{%1, %0|%0, %1}";
+ case MODE_DF:
+ if (TARGET_AVX)
+ {
+ if (REG_P (operands[0]) && REG_P (operands[1]))
+ return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+ else
+ return "vmovsd\t{%1, %0|%0, %1}";
+ }
+ else
+ return "movsd\t{%1, %0|%0, %1}";
+ case MODE_V1DF:
+ return "%vmovlpd\t{%1, %d0|%d0, %1}";
+ case MODE_V2SF:
+ return "%vmovlps\t{%1, %d0|%d0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+
+ case 9:
+ case 10:
+ return "%vmovd\t{%1, %0|%0, %1}";
+
+ default:
+ gcc_unreachable();
+ }
+}
+ [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+ (const_string "orig")
+ (const_string "maybe_vex")))
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0,1,2")
+ (const_string "DF")
+ (eq_attr "alternative" "3,4,9,10")
+ (const_string "DI")
+
+ /* For SSE1, we have many fewer alternatives. */
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+ (cond [(eq_attr "alternative" "5,6")
+ (const_string "V4SF")
+ ]
+ (const_string "V2SF"))
+
+ /* xorps is one byte shorter. */
+ (eq_attr "alternative" "5")
+ (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (const_string "TI")
+ ]
+ (const_string "V2DF"))
+
+ /* For architectures resolving dependencies on
+ whole SSE registers use APD move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ movaps encodes one byte shorter. */
+ (eq_attr "alternative" "6")
+ (cond
+ [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (const_string "V2DF")
+ ]
+ (const_string "DF"))
+ /* For architectures resolving dependencies on register
+ parts we may avoid extra work to zero out upper part
+ of register. */
+ (eq_attr "alternative" "7")
+ (if_then_else
+ (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+ (const_int 0))
+ (const_string "V1DF")
+ (const_string "DF"))
+ ]
+ (const_string "DF")))])
+
+(define_insn "*movdf_integer"
+ [(set (match_operand:DF 0 "nonimmediate_operand"
+ "=f,m,f,r ,o ,Y2*x,Y2*x,Y2*x,m ")
+ (match_operand:DF 1 "general_operand"
+ "fm,f,G,roF,Fr,C ,Y2*x,m ,Y2*x"))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && optimize_function_for_speed_p (cfun)
+ && TARGET_INTEGER_DFMODE_MOVES
+ && (reload_in_progress || reload_completed
+ || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+ || (!(TARGET_SSE2 && TARGET_SSE_MATH)
+ && optimize_function_for_size_p (cfun)
+ && standard_80387_constant_p (operands[1]))
+ || GET_CODE (operands[1]) != CONST_DOUBLE
+ || memory_operand (operands[0], DFmode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return output_387_reg_move (insn, operands);
+
+ case 2:
+ return standard_80387_constant_opcode (operands[1]);
+
+ case 3:
+ case 4:
+ return "#";
+
+ case 5:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "xorps\t%0, %0";
+ case MODE_V2DF:
+ return "xorpd\t%0, %0";
+ case MODE_TI:
+ return "pxor\t%0, %0";
+ default:
+ gcc_unreachable ();
+ }
+ case 6:
+ case 7:
+ case 8:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "movaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "movapd\t{%1, %0|%0, %1}";
+ case MODE_TI:
+ return "movdqa\t{%1, %0|%0, %1}";
+ case MODE_DI:
+ return "movq\t{%1, %0|%0, %1}";
+ case MODE_DF:
+ return "movsd\t{%1, %0|%0, %1}";
+ case MODE_V1DF:
+ return "movlpd\t{%1, %0|%0, %1}";
+ case MODE_V2SF:
+ return "movlps\t{%1, %0|%0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+
+ default:
+ gcc_unreachable();
+ }
+}
+ [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0,1,2")
+ (const_string "DF")
+ (eq_attr "alternative" "3,4")
+ (const_string "SI")
+
+ /* For SSE1, we have many fewer alternatives. */
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+ (cond [(eq_attr "alternative" "5,6")
+ (const_string "V4SF")
+ ]
+ (const_string "V2SF"))
+
+ /* xorps is one byte shorter. */
+ (eq_attr "alternative" "5")
+ (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
+ (const_int 0))
+ (const_string "TI")
+ ]
+ (const_string "V2DF"))
+
+ /* For architectures resolving dependencies on
+ whole SSE registers use APD move to break dependency
+ chains, otherwise use short move to avoid extra work.
+
+ movaps encodes one byte shorter. */
+ (eq_attr "alternative" "6")
+ (cond
+ [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
+ (const_int 0))
+ (const_string "V4SF")
+ (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+ (const_int 0))
+ (const_string "V2DF")
+ ]
+ (const_string "DF"))
+ /* For architectures resolving dependencies on register
+ parts we may avoid extra work to zero out upper part
+ of register. */
+ (eq_attr "alternative" "7")
+ (if_then_else
+ (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
+ (const_int 0))
+ (const_string "V1DF")
+ (const_string "DF"))
+ ]
+ (const_string "DF")))])
+
+(define_split
+ [(set (match_operand:DF 0 "nonimmediate_operand" "")
+ (match_operand:DF 1 "general_operand" ""))]
+ "reload_completed
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && ! (ANY_FP_REG_P (operands[0]) ||
+ (GET_CODE (operands[0]) == SUBREG
+ && ANY_FP_REG_P (SUBREG_REG (operands[0]))))
+ && ! (ANY_FP_REG_P (operands[1]) ||
+ (GET_CODE (operands[1]) == SUBREG
+ && ANY_FP_REG_P (SUBREG_REG (operands[1]))))"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+(define_insn "*swapdf"
+ [(set (match_operand:DF 0 "fp_register_operand" "+f")
+ (match_operand:DF 1 "fp_register_operand" "+f"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "reload_completed || TARGET_80387"
+{
+ if (STACK_TOP_P (operands[0]))
+ return "fxch\t%1";
+ else
+ return "fxch\t%0";
+}
+ [(set_attr "type" "fxch")
+ (set_attr "mode" "DF")])
+
+(define_expand "movxf"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "")
+ (match_operand:XF 1 "general_operand" ""))]
+ ""
+ "ix86_expand_move (XFmode, operands); DONE;")
+
+;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
+;; Size of pushdf using integer instructions is 3+3*memory operand size
+;; Pushing using integer instructions is longer except for constants
+;; and direct memory references.
+;; (assuming that any given constant is pushed only once, but this ought to be
+;; handled elsewhere).
+
+(define_insn "*pushxf_nointeger"
+ [(set (match_operand:XF 0 "push_operand" "=X,X,X")
+ (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))]
+ "optimize_function_for_size_p (cfun)"
+{
+ /* This insn should be already split before reg-stack. */
+ gcc_unreachable ();
+}
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387,*,*")
+ (set_attr "mode" "XF,SI,SI")])
+
+(define_insn "*pushxf_integer"
+ [(set (match_operand:XF 0 "push_operand" "=<,<")
+ (match_operand:XF 1 "general_no_elim_operand" "f,ro"))]
+ "optimize_function_for_speed_p (cfun)"
+{
+ /* This insn should be already split before reg-stack. */
+ gcc_unreachable ();
+}
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387,*")
+ (set_attr "mode" "XF,SI")])
+
+(define_split
+ [(set (match_operand 0 "push_operand" "")
+ (match_operand 1 "general_operand" ""))]
+ "reload_completed
+ && (GET_MODE (operands[0]) == XFmode
+ || GET_MODE (operands[0]) == DFmode)
+ && !ANY_FP_REG_P (operands[1])"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+(define_split
+ [(set (match_operand:XF 0 "push_operand" "")
+ (match_operand:XF 1 "any_fp_register_operand" ""))]
+ ""
+ [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+ (set (mem:XF (reg:P SP_REG)) (match_dup 1))]
+ "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
+
+;; Do not use integer registers when optimizing for size
+(define_insn "*movxf_nointeger"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o")
+ (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))]
+ "optimize_function_for_size_p (cfun)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && (reload_in_progress || reload_completed
+ || standard_80387_constant_p (operands[1])
+ || GET_CODE (operands[1]) != CONST_DOUBLE
+ || memory_operand (operands[0], XFmode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return output_387_reg_move (insn, operands);
+
+ case 2:
+ return standard_80387_constant_opcode (operands[1]);
+
+ case 3: case 4:
+ return "#";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,fmov,fmov,multi,multi")
+ (set_attr "mode" "XF,XF,XF,SI,SI")])
+
+(define_insn "*movxf_integer"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o")
+ (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))]
+ "optimize_function_for_speed_p (cfun)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && (reload_in_progress || reload_completed
+ || GET_CODE (operands[1]) != CONST_DOUBLE
+ || memory_operand (operands[0], XFmode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return output_387_reg_move (insn, operands);
+
+ case 2:
+ return standard_80387_constant_opcode (operands[1]);
+
+ case 3: case 4:
+ return "#";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,fmov,fmov,multi,multi")
+ (set_attr "mode" "XF,XF,XF,SI,SI")])
+
+(define_expand "movtf"
+ [(set (match_operand:TF 0 "nonimmediate_operand" "")
+ (match_operand:TF 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE2"
+{
+ ix86_expand_move (TFmode, operands);
+ DONE;
+})
+
+(define_insn "*movtf_internal"
+ [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o")
+ (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))]
+ "TARGET_SSE2
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "%vmovaps\t{%1, %0|%0, %1}";
+ else
+ return "%vmovdqa\t{%1, %0|%0, %1}";
+ case 2:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "%vxorps\t%0, %d0";
+ else
+ return "%vpxor\t%0, %d0";
+ case 3:
+ case 4:
+ return "#";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "ssemov,ssemov,sselog1,*,*")
+ (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*")
+ (set (attr "mode")
+ (cond [(eq_attr "alternative" "0,2")
+ (if_then_else
+ (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+ (const_int 0))
+ (const_string "V4SF")
+ (const_string "TI"))
+ (eq_attr "alternative" "1")
+ (if_then_else
+ (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))
+ (ne (symbol_ref "optimize_function_for_size_p (cfun)")
+ (const_int 0)))
+ (const_string "V4SF")
+ (const_string "TI"))]
+ (const_string "DI")))])
+
+(define_insn "*pushtf_sse"
+ [(set (match_operand:TF 0 "push_operand" "=<,<,<")
+ (match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))]
+ "TARGET_SSE2"
+{
+ /* This insn should be already split before reg-stack. */
+ gcc_unreachable ();
+}
+ [(set_attr "type" "multi")
+ (set_attr "unit" "sse,*,*")
+ (set_attr "mode" "TF,SI,SI")])
+
+(define_split
+ [(set (match_operand:TF 0 "push_operand" "")
+ (match_operand:TF 1 "general_operand" ""))]
+ "TARGET_SSE2 && reload_completed
+ && !SSE_REG_P (operands[1])"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+(define_split
+ [(set (match_operand:TF 0 "push_operand" "")
+ (match_operand:TF 1 "any_fp_register_operand" ""))]
+ "TARGET_SSE2"
+ [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
+ (set (mem:TF (reg:P SP_REG)) (match_dup 1))]
+ "")
+
+(define_split
+ [(set (match_operand 0 "nonimmediate_operand" "")
+ (match_operand 1 "general_operand" ""))]
+ "reload_completed
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && GET_MODE (operands[0]) == XFmode
+ && ! (ANY_FP_REG_P (operands[0]) ||
+ (GET_CODE (operands[0]) == SUBREG
+ && ANY_FP_REG_P (SUBREG_REG (operands[0]))))
+ && ! (ANY_FP_REG_P (operands[1]) ||
+ (GET_CODE (operands[1]) == SUBREG
+ && ANY_FP_REG_P (SUBREG_REG (operands[1]))))"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (match_operand 1 "memory_operand" ""))]
+ "reload_completed
+ && MEM_P (operands[1])
+ && (GET_MODE (operands[0]) == TFmode
+ || GET_MODE (operands[0]) == XFmode
+ || GET_MODE (operands[0]) == SFmode
+ || GET_MODE (operands[0]) == DFmode)
+ && (operands[2] = find_constant_src (insn))"
+ [(set (match_dup 0) (match_dup 2))]
+{
+ rtx c = operands[2];
+ rtx r = operands[0];
+
+ if (GET_CODE (r) == SUBREG)
+ r = SUBREG_REG (r);
+
+ if (SSE_REG_P (r))
+ {
+ if (!standard_sse_constant_p (c))
+ FAIL;
+ }
+ else if (FP_REG_P (r))
+ {
+ if (!standard_80387_constant_p (c))
+ FAIL;
+ }
+ else if (MMX_REG_P (r))
+ FAIL;
+})
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (float_extend (match_operand 1 "memory_operand" "")))]
+ "reload_completed
+ && MEM_P (operands[1])
+ && (GET_MODE (operands[0]) == TFmode
+ || GET_MODE (operands[0]) == XFmode
+ || GET_MODE (operands[0]) == SFmode
+ || GET_MODE (operands[0]) == DFmode)
+ && (operands[2] = find_constant_src (insn))"
+ [(set (match_dup 0) (match_dup 2))]
+{
+ rtx c = operands[2];
+ rtx r = operands[0];
+
+ if (GET_CODE (r) == SUBREG)
+ r = SUBREG_REG (r);
+
+ if (SSE_REG_P (r))
+ {
+ if (!standard_sse_constant_p (c))
+ FAIL;
+ }
+ else if (FP_REG_P (r))
+ {
+ if (!standard_80387_constant_p (c))
+ FAIL;
+ }
+ else if (MMX_REG_P (r))
+ FAIL;
+})
+
+(define_insn "swapxf"
+ [(set (match_operand:XF 0 "register_operand" "+f")
+ (match_operand:XF 1 "register_operand" "+f"))
+ (set (match_dup 1)
+ (match_dup 0))]
+ "TARGET_80387"
+{
+ if (STACK_TOP_P (operands[0]))
+ return "fxch\t%1";
+ else
+ return "fxch\t%0";
+}
+ [(set_attr "type" "fxch")
+ (set_attr "mode" "XF")])
+
+;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
+(define_split
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (match_operand:X87MODEF 1 "immediate_operand" ""))]
+ "reload_completed && FP_REGNO_P (REGNO (operands[0]))
+ && (standard_80387_constant_p (operands[1]) == 8
+ || standard_80387_constant_p (operands[1]) == 9)"
+ [(set (match_dup 0)(match_dup 1))
+ (set (match_dup 0)
+ (neg:X87MODEF (match_dup 0)))]
+{
+ REAL_VALUE_TYPE r;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
+ if (real_isnegzero (&r))
+ operands[1] = CONST0_RTX (<MODE>mode);
+ else
+ operands[1] = CONST1_RTX (<MODE>mode);
+})
+
+(define_split
+ [(set (match_operand:TF 0 "nonimmediate_operand" "")
+ (match_operand:TF 1 "general_operand" ""))]
+ "reload_completed
+ && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+;; Zero extension instructions
+
+(define_expand "zero_extendhisi2"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+ ""
+{
+ if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
+ {
+ operands[1] = force_reg (HImode, operands[1]);
+ emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "zero_extendhisi2_and"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI (match_operand:HI 1 "register_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")])
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_operand:HI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed && TARGET_ZERO_EXTEND_WITH_AND
+ && optimize_function_for_speed_p (cfun)"
+ [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_insn "*zero_extendhisi2_movzwl"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+ "!TARGET_ZERO_EXTEND_WITH_AND
+ || optimize_function_for_size_p (cfun)"
+ "movz{wl|x}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+(define_expand "zero_extendqihi2"
+ [(parallel
+ [(set (match_operand:HI 0 "register_operand" "")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_insn "*zero_extendqihi2_and"
+ [(set (match_operand:HI 0 "register_operand" "=r,?&q")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "HI")])
+
+(define_insn "*zero_extendqihi2_movzbw_and"
+ [(set (match_operand:HI 0 "register_operand" "=r,r")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)"
+ "#"
+ [(set_attr "type" "imovx,alu1")
+ (set_attr "mode" "HI")])
+
+; zero extend to SImode here to avoid partial register stalls
+(define_insn "*zero_extendqihi2_movzbl"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+ "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
+ && reload_completed"
+ "movz{bl|x}\t{%1, %k0|%k0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+;; For the movzbw case strip only the clobber
+(define_split
+ [(set (match_operand:HI 0 "register_operand" "")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && (!TARGET_ZERO_EXTEND_WITH_AND
+ || optimize_function_for_size_p (cfun))
+ && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))"
+ [(set (match_operand:HI 0 "register_operand" "")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))])
+
+;; When source and destination does not overlap, clear destination
+;; first and then do the movb
+(define_split
+ [(set (match_operand:HI 0 "register_operand" "")
+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && ANY_QI_REG_P (operands[0])
+ && (TARGET_ZERO_EXTEND_WITH_AND
+ && optimize_function_for_speed_p (cfun))
+ && !reg_overlap_mentioned_p (operands[0], operands[1])"
+ [(set (match_dup 0) (const_int 0))
+ (set (strict_low_part (match_dup 2)) (match_dup 1))]
+ "operands[2] = gen_lowpart (QImode, operands[0]);")
+
+;; Rest is handled by single and.
+(define_split
+ [(set (match_operand:HI 0 "register_operand" "")
+ (zero_extend:HI (match_operand:QI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && true_regnum (operands[0]) == true_regnum (operands[1])"
+ [(parallel [(set (match_dup 0) (and:HI (match_dup 0) (const_int 255)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_expand "zero_extendqisi2"
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_insn "*zero_extendqisi2_and"
+ [(set (match_operand:SI 0 "register_operand" "=r,?&q")
+ (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendqisi2_movzbw_and"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)"
+ "#"
+ [(set_attr "type" "imovx,alu1")
+ (set_attr "mode" "SI")])
+
+(define_insn "*zero_extendqisi2_movzbw"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+ "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
+ && reload_completed"
+ "movz{bl|x}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+;; For the movzbl case strip only the clobber
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
+ && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))"
+ [(set (match_dup 0)
+ (zero_extend:SI (match_dup 1)))])
+
+;; When source and destination does not overlap, clear destination
+;; first and then do the movb
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && ANY_QI_REG_P (operands[0])
+ && (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]))
+ && (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
+ && !reg_overlap_mentioned_p (operands[0], operands[1])"
+ [(set (match_dup 0) (const_int 0))
+ (set (strict_low_part (match_dup 2)) (match_dup 1))]
+ "operands[2] = gen_lowpart (QImode, operands[0]);")
+
+;; Rest is handled by single and.
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (match_operand:QI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && true_regnum (operands[0]) == true_regnum (operands[1])"
+ [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+;; %%% Kill me once multi-word ops are sane.
+(define_expand "zero_extendsidi2"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))]
+ ""
+{
+ if (!TARGET_64BIT)
+ {
+ emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "zero_extendsidi2_32"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,?*y,?*Yi,*Y2")
+ (zero_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r ,m ,r ,m")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ "@
+ #
+ #
+ #
+ movd\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}
+ %vmovd\t{%1, %0|%0, %1}
+ %vmovd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov")
+ (set_attr "prefix" "*,*,*,orig,orig,maybe_vex,maybe_vex")
+ (set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")])
+
+(define_insn "zero_extendsidi2_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,?*y,?*Yi,*Y2")
+ (zero_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" "rm,0,r ,m ,r ,m")))]
+ "TARGET_64BIT"
+ "@
+ mov\t{%k1, %k0|%k0, %k1}
+ #
+ movd\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}
+ %vmovd\t{%1, %0|%0, %1}
+ %vmovd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov")
+ (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
+ (set_attr "mode" "SI,DI,DI,DI,TI,TI")])
+
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (zero_extend:DI (match_dup 0)))]
+ "TARGET_64BIT"
+ [(set (match_dup 4) (const_int 0))]
+ "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && reload_completed
+ && true_regnum (operands[0]) == true_regnum (operands[1])"
+ [(set (match_dup 4) (const_int 0))]
+ "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+
+(define_split
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (zero_extend:DI (match_operand:SI 1 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && reload_completed
+ && !SSE_REG_P (operands[0]) && !MMX_REG_P (operands[0])"
+ [(set (match_dup 3) (match_dup 1))
+ (set (match_dup 4) (const_int 0))]
+ "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+
+(define_insn "zero_extendhidi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+ "TARGET_64BIT"
+ "movz{wl|x}\t{%1, %k0|%k0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "DI")])
+
+(define_insn "zero_extendqidi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "rm")))]
+ "TARGET_64BIT"
+ "movz{bl|x}\t{%1, %k0|%k0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "DI")])
+
+;; Sign extension instructions
+
+(define_expand "extendsidi2"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_scratch:SI 2 ""))])]
+ ""
+{
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_extendsidi2_rex64 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "*extendsidi2_1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o")
+ (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_scratch:SI 2 "=X,X,X,&r"))]
+ "!TARGET_64BIT"
+ "#")
+
+(define_insn "extendsidi2_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=*a,r")
+ (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))]
+ "TARGET_64BIT"
+ "@
+ {cltq|cdqe}
+ movs{lq|x}\t{%1,%0|%0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "DI")
+ (set_attr "prefix_0f" "0")
+ (set_attr "modrm" "0,1")])
+
+(define_insn "extendhidi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+ "TARGET_64BIT"
+ "movs{wq|x}\t{%1,%0|%0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "DI")])
+
+(define_insn "extendqidi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (sign_extend:DI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+ "TARGET_64BIT"
+ "movs{bq|x}\t{%1,%0|%0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "DI")])
+
+;; Extend to memory case when source register does die.
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_operand:SI 2 "register_operand" ""))]
+ "(reload_completed
+ && dead_or_set_p (insn, operands[1])
+ && !reg_mentioned_p (operands[1], operands[0]))"
+ [(set (match_dup 3) (match_dup 1))
+ (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31)))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 4) (match_dup 1))]
+ "split_di (&operands[0], 1, &operands[3], &operands[4]);")
+
+;; Extend to memory case when source register does not die.
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_operand:SI 2 "register_operand" ""))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ split_di (&operands[0], 1, &operands[3], &operands[4]);
+
+ emit_move_insn (operands[3], operands[1]);
+
+ /* Generate a cltd if possible and doing so it profitable. */
+ if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+ && true_regnum (operands[1]) == AX_REG
+ && true_regnum (operands[2]) == DX_REG)
+ {
+ emit_insn (gen_ashrsi3_31 (operands[2], operands[1], GEN_INT (31)));
+ }
+ else
+ {
+ emit_move_insn (operands[2], operands[1]);
+ emit_insn (gen_ashrsi3_31 (operands[2], operands[2], GEN_INT (31)));
+ }
+ emit_move_insn (operands[4], operands[2]);
+ DONE;
+})
+
+;; Extend to register case. Optimize case where source and destination
+;; registers match and cases where we can use cltd.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (sign_extend:DI (match_operand:SI 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_scratch:SI 2 ""))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ split_di (&operands[0], 1, &operands[3], &operands[4]);
+
+ if (true_regnum (operands[3]) != true_regnum (operands[1]))
+ emit_move_insn (operands[3], operands[1]);
+
+ /* Generate a cltd if possible and doing so it profitable. */
+ if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
+ && true_regnum (operands[3]) == AX_REG)
+ {
+ emit_insn (gen_ashrsi3_31 (operands[4], operands[3], GEN_INT (31)));
+ DONE;
+ }
+
+ if (true_regnum (operands[4]) != true_regnum (operands[1]))
+ emit_move_insn (operands[4], operands[1]);
+
+ emit_insn (gen_ashrsi3_31 (operands[4], operands[4], GEN_INT (31)));
+ DONE;
+})
+
+(define_insn "extendhisi2"
+ [(set (match_operand:SI 0 "register_operand" "=*a,r")
+ (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))]
+ ""
+{
+ switch (get_attr_prefix_0f (insn))
+ {
+ case 0:
+ return "{cwtl|cwde}";
+ default:
+ return "movs{wl|x}\t{%1,%0|%0, %1}";
+ }
+}
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")
+ (set (attr "prefix_0f")
+ ;; movsx is short decodable while cwtl is vector decoded.
+ (if_then_else (and (eq_attr "cpu" "!k6")
+ (eq_attr "alternative" "0"))
+ (const_string "0")
+ (const_string "1")))
+ (set (attr "modrm")
+ (if_then_else (eq_attr "prefix_0f" "0")
+ (const_string "0")
+ (const_string "1")))])
+
+(define_insn "*extendhisi2_zext"
+ [(set (match_operand:DI 0 "register_operand" "=*a,r")
+ (zero_extend:DI
+ (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))]
+ "TARGET_64BIT"
+{
+ switch (get_attr_prefix_0f (insn))
+ {
+ case 0:
+ return "{cwtl|cwde}";
+ default:
+ return "movs{wl|x}\t{%1,%k0|%k0, %1}";
+ }
+}
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")
+ (set (attr "prefix_0f")
+ ;; movsx is short decodable while cwtl is vector decoded.
+ (if_then_else (and (eq_attr "cpu" "!k6")
+ (eq_attr "alternative" "0"))
+ (const_string "0")
+ (const_string "1")))
+ (set (attr "modrm")
+ (if_then_else (eq_attr "prefix_0f" "0")
+ (const_string "0")
+ (const_string "1")))])
+
+(define_insn "extendqihi2"
+ [(set (match_operand:HI 0 "register_operand" "=*a,r")
+ (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))]
+ ""
+{
+ switch (get_attr_prefix_0f (insn))
+ {
+ case 0:
+ return "{cbtw|cbw}";
+ default:
+ return "movs{bw|x}\t{%1,%0|%0, %1}";
+ }
+}
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "HI")
+ (set (attr "prefix_0f")
+ ;; movsx is short decodable while cwtl is vector decoded.
+ (if_then_else (and (eq_attr "cpu" "!k6")
+ (eq_attr "alternative" "0"))
+ (const_string "0")
+ (const_string "1")))
+ (set (attr "modrm")
+ (if_then_else (eq_attr "prefix_0f" "0")
+ (const_string "0")
+ (const_string "1")))])
+
+(define_insn "extendqisi2"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
+ ""
+ "movs{bl|x}\t{%1,%0|%0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+(define_insn "*extendqisi2_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))]
+ "TARGET_64BIT"
+ "movs{bl|x}\t{%1,%k0|%k0, %1}"
+ [(set_attr "type" "imovx")
+ (set_attr "mode" "SI")])
+
+;; Conversions between float and double.
+
+;; These are all no-ops in the model used for the 80387. So just
+;; emit moves.
+
+;; %%% Kill these when call knows how to work out a DFmode push earlier.
+(define_insn "*dummy_extendsfdf2"
+ [(set (match_operand:DF 0 "push_operand" "=<")
+ (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY2")))]
+ "0"
+ "#")
+
+(define_split
+ [(set (match_operand:DF 0 "push_operand" "")
+ (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))]
+ ""
+ [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
+ (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))])
+
+(define_insn "*dummy_extendsfxf2"
+ [(set (match_operand:XF 0 "push_operand" "=<")
+ (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "f")))]
+ "0"
+ "#")
+
+(define_split
+ [(set (match_operand:XF 0 "push_operand" "")
+ (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))]
+ ""
+ [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+ (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
+ "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
+
+(define_split
+ [(set (match_operand:XF 0 "push_operand" "")
+ (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))]
+ ""
+ [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+ (set (mem:DF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
+ "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
+
+(define_expand "extendsfdf2"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "")
+ (float_extend:DF (match_operand:SF 1 "general_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+ /* ??? Needed for compress_float_constant since all fp constants
+ are LEGITIMATE_CONSTANT_P. */
+ if (GET_CODE (operands[1]) == CONST_DOUBLE)
+ {
+ if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
+ && standard_80387_constant_p (operands[1]) > 0)
+ {
+ operands[1] = simplify_const_unary_operation
+ (FLOAT_EXTEND, DFmode, operands[1], SFmode);
+ emit_move_insn_1 (operands[0], operands[1]);
+ DONE;
+ }
+ operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
+ }
+})
+
+/* For converting SF(xmm2) to DF(xmm1), use the following code instead of
+ cvtss2sd:
+ unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs
+ cvtps2pd xmm2,xmm1
+ We do the conversion post reload to avoid producing of 128bit spills
+ that might lead to ICE on 32bit target. The sequence unlikely combine
+ anyway. */
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (float_extend:DF
+ (match_operand:SF 1 "nonimmediate_operand" "")))]
+ "TARGET_USE_VECTOR_FP_CONVERTS
+ && optimize_insn_for_speed_p ()
+ && reload_completed && SSE_REG_P (operands[0])"
+ [(set (match_dup 2)
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_dup 3)
+ (parallel [(const_int 0) (const_int 1)]))))]
+{
+ operands[2] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+ operands[3] = simplify_gen_subreg (V4SFmode, operands[0], DFmode, 0);
+ /* Use movss for loading from memory, unpcklps reg, reg for registers.
+ Try to avoid move when unpacking can be done in source. */
+ if (REG_P (operands[1]))
+ {
+ /* If it is unsafe to overwrite upper half of source, we need
+ to move to destination and unpack there. */
+ if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+ || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
+ && true_regnum (operands[0]) != true_regnum (operands[1]))
+ {
+ rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0]));
+ emit_move_insn (tmp, operands[1]);
+ }
+ else
+ operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
+ emit_insn (gen_sse_unpcklps (operands[3], operands[3], operands[3]));
+ }
+ else
+ emit_insn (gen_vec_setv4sf_0 (operands[3],
+ CONST0_RTX (V4SFmode), operands[1]));
+})
+
+(define_insn "*extendsfdf2_mixed"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
+ (float_extend:DF
+ (match_operand:SF 1 "nonimmediate_operand" "fm,f,xm")))]
+ "TARGET_SSE2 && TARGET_MIX_SSE_I387"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ case 1:
+ return output_387_reg_move (insn, operands);
+
+ case 2:
+ return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,fmov,ssecvt")
+ (set_attr "prefix" "orig,orig,maybe_vex")
+ (set_attr "mode" "SF,XF,DF")])
+
+(define_insn "*extendsfdf2_sse"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=x")
+ (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "%vcvtss2sd\t{%1, %d0|%d0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "DF")])
+
+(define_insn "*extendsfdf2_i387"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m")
+ (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))]
+ "TARGET_80387"
+ "* return output_387_reg_move (insn, operands);"
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "SF,XF")])
+
+(define_expand "extend<mode>xf2"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "")
+ (float_extend:XF (match_operand:MODEF 1 "general_operand" "")))]
+ "TARGET_80387"
+{
+ /* ??? Needed for compress_float_constant since all fp constants
+ are LEGITIMATE_CONSTANT_P. */
+ if (GET_CODE (operands[1]) == CONST_DOUBLE)
+ {
+ if (standard_80387_constant_p (operands[1]) > 0)
+ {
+ operands[1] = simplify_const_unary_operation
+ (FLOAT_EXTEND, XFmode, operands[1], <MODE>mode);
+ emit_move_insn_1 (operands[0], operands[1]);
+ DONE;
+ }
+ operands[1] = validize_mem (force_const_mem (<MODE>mode, operands[1]));
+ }
+})
+
+(define_insn "*extend<mode>xf2_i387"
+ [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
+ (float_extend:XF
+ (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))]
+ "TARGET_80387"
+ "* return output_387_reg_move (insn, operands);"
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "<MODE>,XF")])
+
+;; %%% This seems bad bad news.
+;; This cannot output into an f-reg because there is no way to be sure
+;; of truncating in that case. Otherwise this is just like a simple move
+;; insn. So we pretend we can output to a reg in order to get better
+;; register preferencing, but we really use a stack slot.
+
+;; Conversion from DFmode to SFmode.
+
+(define_expand "truncdfsf2"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+ if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387)
+ ;
+ else if (flag_unsafe_math_optimizations)
+ ;
+ else
+ {
+ int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
+ rtx temp = assign_386_stack_local (SFmode, slot);
+ emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp));
+ DONE;
+ }
+})
+
+/* For converting DF(xmm2) to SF(xmm1), use the following code instead of
+ cvtsd2ss:
+ unpcklpd xmm2,xmm2 ; packed conversion might crash on signaling NaNs
+ cvtpd2ps xmm2,xmm1
+ We do the conversion post reload to avoid producing of 128bit spills
+ that might lead to ICE on 32bit target. The sequence unlikely combine
+ anyway. */
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "")))]
+ "TARGET_USE_VECTOR_FP_CONVERTS
+ && optimize_insn_for_speed_p ()
+ && reload_completed && SSE_REG_P (operands[0])"
+ [(set (match_dup 2)
+ (vec_concat:V4SF
+ (float_truncate:V2SF
+ (match_dup 4))
+ (match_dup 3)))]
+{
+ operands[2] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+ operands[3] = CONST0_RTX (V2SFmode);
+ operands[4] = simplify_gen_subreg (V2DFmode, operands[0], SFmode, 0);
+ /* Use movsd for loading from memory, unpcklpd for registers.
+ Try to avoid move when unpacking can be done in source, or SSE3
+ movddup is available. */
+ if (REG_P (operands[1]))
+ {
+ if (!TARGET_SSE3
+ && true_regnum (operands[0]) != true_regnum (operands[1])
+ && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+ || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8))
+ {
+ rtx tmp = simplify_gen_subreg (DFmode, operands[0], SFmode, 0);
+ emit_move_insn (tmp, operands[1]);
+ operands[1] = tmp;
+ }
+ else if (!TARGET_SSE3)
+ operands[4] = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
+ emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
+ }
+ else
+ emit_insn (gen_sse2_loadlpd (operands[4],
+ CONST0_RTX (V2DFmode), operands[1]));
+})
+
+(define_expand "truncdfsf2_with_temp"
+ [(parallel [(set (match_operand:SF 0 "" "")
+ (float_truncate:SF (match_operand:DF 1 "" "")))
+ (clobber (match_operand:SF 2 "" ""))])]
+ "")
+
+(define_insn "*truncdfsf_fast_mixed"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=fm,x")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "f ,xm")))]
+ "TARGET_SSE2 && TARGET_MIX_SSE_I387 && flag_unsafe_math_optimizations"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return output_387_reg_move (insn, operands);
+ case 1:
+ return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "fmov,ssecvt")
+ (set_attr "prefix" "orig,maybe_vex")
+ (set_attr "mode" "SF")])
+
+;; Yes, this one doesn't depend on flag_unsafe_math_optimizations,
+;; because nothing we do here is unsafe.
+(define_insn "*truncdfsf_fast_sse"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH"
+ "%vcvtsd2ss\t{%1, %d0|%d0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_fast_i387"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=fm")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "f")))]
+ "TARGET_80387 && flag_unsafe_math_optimizations"
+ "* return output_387_reg_move (insn, operands);"
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_mixed"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=m,Y2 ,?f,?x,?*r")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "f ,Y2m,f ,f ,f")))
+ (clobber (match_operand:SF 2 "memory_operand" "=X,X ,m ,m ,m"))]
+ "TARGET_MIX_SSE_I387"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return output_387_reg_move (insn, operands);
+ case 1:
+ return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
+
+ default:
+ return "#";
+ }
+}
+ [(set_attr "type" "fmov,ssecvt,multi,multi,multi")
+ (set_attr "unit" "*,*,i387,i387,i387")
+ (set_attr "prefix" "orig,maybe_vex,orig,orig,orig")
+ (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf_i387"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r")
+ (float_truncate:SF
+ (match_operand:DF 1 "nonimmediate_operand" "f ,f ,f ,f")))
+ (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))]
+ "TARGET_80387"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return output_387_reg_move (insn, operands);
+
+ default:
+ return "#";
+ }
+}
+ [(set_attr "type" "fmov,multi,multi,multi")
+ (set_attr "unit" "*,i387,i387,i387")
+ (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf2_i387_1"
+ [(set (match_operand:SF 0 "memory_operand" "=m")
+ (float_truncate:SF
+ (match_operand:DF 1 "register_operand" "f")))]
+ "TARGET_80387
+ && !(TARGET_SSE2 && TARGET_SSE_MATH)
+ && !TARGET_MIX_SSE_I387"
+ "* return output_387_reg_move (insn, operands);"
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "SF")])
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (float_truncate:SF
+ (match_operand:DF 1 "fp_register_operand" "")))
+ (clobber (match_operand 2 "" ""))]
+ "reload_completed"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+{
+ operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));
+})
+
+;; Conversion from XFmode to {SF,DF}mode
+
+(define_expand "truncxf<mode>2"
+ [(parallel [(set (match_operand:MODEF 0 "nonimmediate_operand" "")
+ (float_truncate:MODEF
+ (match_operand:XF 1 "register_operand" "")))
+ (clobber (match_dup 2))])]
+ "TARGET_80387"
+{
+ if (flag_unsafe_math_optimizations)
+ {
+ rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_truncxf<mode>2_i387_noop (reg, operands[1]));
+ if (reg != operands[0])
+ emit_move_insn (operands[0], reg);
+ DONE;
+ }
+ else
+ {
+ int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
+ operands[2] = assign_386_stack_local (<MODE>mode, slot);
+ }
+})
+
+(define_insn "*truncxfsf2_mixed"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r")
+ (float_truncate:SF
+ (match_operand:XF 1 "register_operand" "f ,f ,f ,f")))
+ (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))]
+ "TARGET_80387"
+{
+ gcc_assert (!which_alternative);
+ return output_387_reg_move (insn, operands);
+}
+ [(set_attr "type" "fmov,multi,multi,multi")
+ (set_attr "unit" "*,i387,i387,i387")
+ (set_attr "mode" "SF")])
+
+(define_insn "*truncxfdf2_mixed"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?Y2,?*r")
+ (float_truncate:DF
+ (match_operand:XF 1 "register_operand" "f ,f ,f ,f")))
+ (clobber (match_operand:DF 2 "memory_operand" "=X,m ,m ,m"))]
+ "TARGET_80387"
+{
+ gcc_assert (!which_alternative);
+ return output_387_reg_move (insn, operands);
+}
+ [(set_attr "type" "fmov,multi,multi,multi")
+ (set_attr "unit" "*,i387,i387,i387")
+ (set_attr "mode" "DF")])
+
+(define_insn "truncxf<mode>2_i387_noop"
+ [(set (match_operand:MODEF 0 "register_operand" "=f")
+ (float_truncate:MODEF
+ (match_operand:XF 1 "register_operand" "f")))]
+ "TARGET_80387 && flag_unsafe_math_optimizations"
+ "* return output_387_reg_move (insn, operands);"
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*truncxf<mode>2_i387"
+ [(set (match_operand:MODEF 0 "memory_operand" "=m")
+ (float_truncate:MODEF
+ (match_operand:XF 1 "register_operand" "f")))]
+ "TARGET_80387"
+ "* return output_387_reg_move (insn, operands);"
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (float_truncate:MODEF
+ (match_operand:XF 1 "register_operand" "")))
+ (clobber (match_operand:MODEF 2 "memory_operand" ""))]
+ "TARGET_80387 && reload_completed"
+ [(set (match_dup 2) (float_truncate:MODEF (match_dup 1)))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_split
+ [(set (match_operand:MODEF 0 "memory_operand" "")
+ (float_truncate:MODEF
+ (match_operand:XF 1 "register_operand" "")))
+ (clobber (match_operand:MODEF 2 "memory_operand" ""))]
+ "TARGET_80387"
+ [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))]
+ "")
+
+;; Signed conversion to DImode.
+
+(define_expand "fix_truncxfdi2"
+ [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (fix:DI (match_operand:XF 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_80387"
+{
+ if (TARGET_FISTTP)
+ {
+ emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_expand "fix_trunc<mode>di2"
+ [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (fix:DI (match_operand:MODEF 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))"
+{
+ if (TARGET_FISTTP
+ && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+ {
+ emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+ DONE;
+ }
+ if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))
+ {
+ rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode);
+ emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1]));
+ if (out != operands[0])
+ emit_move_insn (operands[0], out);
+ DONE;
+ }
+})
+
+;; Signed conversion to SImode.
+
+(define_expand "fix_truncxfsi2"
+ [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (fix:SI (match_operand:XF 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_80387"
+{
+ if (TARGET_FISTTP)
+ {
+ emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_expand "fix_trunc<mode>si2"
+ [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (fix:SI (match_operand:MODEF 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)"
+{
+ if (TARGET_FISTTP
+ && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+ {
+ emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+ DONE;
+ }
+ if (SSE_FLOAT_MODE_P (<MODE>mode))
+ {
+ rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
+ emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1]));
+ if (out != operands[0])
+ emit_move_insn (operands[0], out);
+ DONE;
+ }
+})
+
+;; Signed conversion to HImode.
+
+(define_expand "fix_trunc<mode>hi2"
+ [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (fix:HI (match_operand:X87MODEF 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_80387
+ && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))"
+{
+ if (TARGET_FISTTP)
+ {
+ emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+;; Unsigned conversion to SImode.
+
+(define_expand "fixuns_trunc<mode>si2"
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (unsigned_fix:SI
+ (match_operand:MODEF 1 "nonimmediate_operand" "")))
+ (use (match_dup 2))
+ (clobber (match_scratch:<ssevecmode> 3 ""))
+ (clobber (match_scratch:<ssevecmode> 4 ""))])]
+ "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
+{
+ enum machine_mode mode = <MODE>mode;
+ enum machine_mode vecmode = <ssevecmode>mode;
+ REAL_VALUE_TYPE TWO31r;
+ rtx two31;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ real_ldexp (&TWO31r, &dconst1, 31);
+ two31 = const_double_from_real_value (TWO31r, mode);
+ two31 = ix86_build_const_vector (mode, true, two31);
+ operands[2] = force_reg (vecmode, two31);
+})
+
+(define_insn_and_split "*fixuns_trunc<mode>_1"
+ [(set (match_operand:SI 0 "register_operand" "=&x,&x")
+ (unsigned_fix:SI
+ (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm")))
+ (use (match_operand:<ssevecmode> 4 "nonimmediate_operand" "m,x"))
+ (clobber (match_scratch:<ssevecmode> 1 "=x,&x"))
+ (clobber (match_scratch:<ssevecmode> 2 "=x,x"))]
+ "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+ && optimize_function_for_speed_p (cfun)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ ix86_split_convert_uns_si_sse (operands);
+ DONE;
+})
+
+;; Unsigned conversion to HImode.
+;; Without these patterns, we'll try the unsigned SI conversion which
+;; is complex for SSE, rather than the signed SI conversion, which isn't.
+
+(define_expand "fixuns_trunc<mode>hi2"
+ [(set (match_dup 2)
+ (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "")))
+ (set (match_operand:HI 0 "nonimmediate_operand" "")
+ (subreg:HI (match_dup 2) 0))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "operands[2] = gen_reg_rtx (SImode);")
+
+;; When SSE is available, it is always faster to use it!
+(define_insn "fix_trunc<mode>di_sse"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
+ "TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)
+ && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+ "%vcvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
+
+(define_insn "fix_trunc<mode>si_sse"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
+ "SSE_FLOAT_MODE_P (<MODE>mode)
+ && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+ "%vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
+
+;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns.
+(define_peephole2
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (match_operand:MODEF 1 "memory_operand" ""))
+ (set (match_operand:SSEMODEI24 2 "register_operand" "")
+ (fix:SSEMODEI24 (match_dup 0)))]
+ "TARGET_SHORTEN_X87_SSE
+ && peep2_reg_dead_p (2, operands[0])"
+ [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))]
+ "")
+
+;; Avoid vector decoded forms of the instruction.
+(define_peephole2
+ [(match_scratch:DF 2 "Y2")
+ (set (match_operand:SSEMODEI24 0 "register_operand" "")
+ (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))]
+ "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
+ "")
+
+(define_peephole2
+ [(match_scratch:SF 2 "x")
+ (set (match_operand:SSEMODEI24 0 "register_operand" "")
+ (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))]
+ "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
+ "")
+
+(define_insn_and_split "fix_trunc<mode>_fisttp_i387_1"
+ [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+ (fix:X87MODEI (match_operand 1 "register_operand" "")))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && TARGET_FISTTP
+ && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && (TARGET_64BIT || <MODE>mode != DImode))
+ && TARGET_SSE_MATH)
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ if (memory_operand (operands[0], VOIDmode))
+ emit_insn (gen_fix_trunc<mode>_i387_fisttp (operands[0], operands[1]));
+ else
+ {
+ operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+ emit_insn (gen_fix_trunc<mode>_i387_fisttp_with_temp (operands[0],
+ operands[1],
+ operands[2]));
+ }
+ DONE;
+}
+ [(set_attr "type" "fisttp")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_fisttp"
+ [(set (match_operand:X87MODEI 0 "memory_operand" "=m")
+ (fix:X87MODEI (match_operand 1 "register_operand" "f")))
+ (clobber (match_scratch:XF 2 "=&1f"))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && TARGET_FISTTP
+ && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && (TARGET_64BIT || <MODE>mode != DImode))
+ && TARGET_SSE_MATH)"
+ "* return output_fix_trunc (insn, operands, 1);"
+ [(set_attr "type" "fisttp")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_fisttp_with_temp"
+ [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
+ (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))
+ (clobber (match_operand:X87MODEI 2 "memory_operand" "=X,m"))
+ (clobber (match_scratch:XF 3 "=&1f,&1f"))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && TARGET_FISTTP
+ && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && (TARGET_64BIT || <MODE>mode != DImode))
+ && TARGET_SSE_MATH)"
+ "#"
+ [(set_attr "type" "fisttp")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:X87MODEI 0 "register_operand" "")
+ (fix:X87MODEI (match_operand 1 "register_operand" "")))
+ (clobber (match_operand:X87MODEI 2 "memory_operand" ""))
+ (clobber (match_scratch 3 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 2) (fix:X87MODEI (match_dup 1)))
+ (clobber (match_dup 3))])
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_split
+ [(set (match_operand:X87MODEI 0 "memory_operand" "")
+ (fix:X87MODEI (match_operand 1 "register_operand" "")))
+ (clobber (match_operand:X87MODEI 2 "memory_operand" ""))
+ (clobber (match_scratch 3 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (fix:X87MODEI (match_dup 1)))
+ (clobber (match_dup 3))])]
+ "")
+
+;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
+;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
+;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG
+;; clobbering insns can be used. Look at emit_i387_cw_initialization ()
+;; function in i386.c.
+(define_insn_and_split "*fix_trunc<mode>_i387_1"
+ [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+ (fix:X87MODEI (match_operand 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !TARGET_FISTTP
+ && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && (TARGET_64BIT || <MODE>mode != DImode))
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_TRUNC] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
+ if (memory_operand (operands[0], VOIDmode))
+ emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
+ operands[2], operands[3]));
+ else
+ {
+ operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+ emit_insn (gen_fix_trunc<mode>_i387_with_temp (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[4]));
+ }
+ DONE;
+}
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_truncdi_i387"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (fix:DI (match_operand 1 "register_operand" "f")))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))
+ (clobber (match_scratch:XF 4 "=&1f"))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !TARGET_FISTTP
+ && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "DI")])
+
+(define_insn "fix_truncdi_i387_with_temp"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+ (fix:DI (match_operand 1 "register_operand" "f,f")))
+ (use (match_operand:HI 2 "memory_operand" "m,m"))
+ (use (match_operand:HI 3 "memory_operand" "m,m"))
+ (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
+ (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !TARGET_FISTTP
+ && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
+ "#"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (fix:DI (match_operand 1 "register_operand" "")))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:DI 4 "memory_operand" ""))
+ (clobber (match_scratch 5 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 4) (fix:DI (match_dup 1)))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (clobber (match_dup 5))])
+ (set (match_dup 0) (match_dup 4))]
+ "")
+
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (fix:DI (match_operand 1 "register_operand" "")))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:DI 4 "memory_operand" ""))
+ (clobber (match_scratch 5 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (fix:DI (match_dup 1)))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (clobber (match_dup 5))])]
+ "")
+
+(define_insn "fix_trunc<mode>_i387"
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+ (fix:X87MODEI12 (match_operand 1 "register_operand" "f")))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !TARGET_FISTTP
+ && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_with_temp"
+ [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+ (fix:X87MODEI12 (match_operand 1 "register_operand" "f,f")))
+ (use (match_operand:HI 2 "memory_operand" "m,m"))
+ (use (match_operand:HI 3 "memory_operand" "m,m"))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && !TARGET_FISTTP
+ && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
+ "#"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "register_operand" "")
+ (fix:X87MODEI12 (match_operand 1 "register_operand" "")))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 4) (fix:X87MODEI12 (match_dup 1)))
+ (use (match_dup 2))
+ (use (match_dup 3))])
+ (set (match_dup 0) (match_dup 4))]
+ "")
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+ (fix:X87MODEI12 (match_operand 1 "register_operand" "")))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1)))
+ (use (match_dup 2))
+ (use (match_dup 3))])]
+ "")
+
+(define_insn "x86_fnstcw_1"
+ [(set (match_operand:HI 0 "memory_operand" "=m")
+ (unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))]
+ "TARGET_80387"
+ "fnstcw\t%0"
+ [(set_attr "length" "2")
+ (set_attr "mode" "HI")
+ (set_attr "unit" "i387")])
+
+(define_insn "x86_fldcw_1"
+ [(set (reg:HI FPCR_REG)
+ (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))]
+ "TARGET_80387"
+ "fldcw\t%0"
+ [(set_attr "length" "2")
+ (set_attr "mode" "HI")
+ (set_attr "unit" "i387")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
+
+;; Conversion between fixed point and floating point.
+
+;; Even though we only accept memory inputs, the backend _really_
+;; wants to be able to do this between registers.
+
+(define_expand "floathi<mode>2"
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "")))]
+ "TARGET_80387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)"
+ "")
+
+;; Pre-reload splitter to add memory clobber to the pattern.
+(define_insn_and_split "*floathi<mode>2_1"
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (float:X87MODEF (match_operand:HI 1 "register_operand" "")))]
+ "TARGET_80387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 0)
+ (float:X87MODEF (match_dup 1)))
+ (clobber (match_dup 2))])]
+ "operands[2] = assign_386_stack_local (HImode, SLOT_TEMP);")
+
+(define_insn "*floathi<mode>2_i387_with_temp"
+ [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+ (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))
+ (clobber (match_operand:HI 2 "memory_operand" "=m,m"))]
+ "TARGET_80387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)"
+ "#"
+ [(set_attr "type" "fmov,multi")
+ (set_attr "mode" "<MODE>")
+ (set_attr "unit" "*,i387")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*floathi<mode>2_i387"
+ [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+ (float:X87MODEF (match_operand:HI 1 "memory_operand" "m")))]
+ "TARGET_80387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)"
+ "fild%z1\t%1"
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "<MODE>")
+ (set_attr "fp_int_src" "true")])
+
+(define_split
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (float:X87MODEF (match_operand:HI 1 "register_operand" "")))
+ (clobber (match_operand:HI 2 "memory_operand" ""))]
+ "TARGET_80387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && reload_completed"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+ "")
+
+(define_split
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (float:X87MODEF (match_operand:HI 1 "memory_operand" "")))
+ (clobber (match_operand:HI 2 "memory_operand" ""))]
+ "TARGET_80387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && reload_completed"
+ [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+ "")
+
+(define_expand "float<SSEMODEI24:mode><X87MODEF:mode>2"
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (float:X87MODEF
+ (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))]
+ "TARGET_80387
+ || ((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)"
+ "")
+
+;; Pre-reload splitter to add memory clobber to the pattern.
+(define_insn_and_split "*float<SSEMODEI24:mode><X87MODEF:mode>2_1"
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))]
+ "((TARGET_80387
+ && (!((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387))
+ || ((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
+ && ((<SSEMODEI24:MODE>mode == SImode
+ && TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS
+ && optimize_function_for_speed_p (cfun)
+ && flag_trapping_math)
+ || !(TARGET_INTER_UNIT_CONVERSIONS
+ || optimize_function_for_size_p (cfun)))))
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1)))
+ (clobber (match_dup 2))])]
+{
+ operands[2] = assign_386_stack_local (<SSEMODEI24:MODE>mode, SLOT_TEMP);
+
+ /* Avoid store forwarding (partial memory) stall penalty
+ by passing DImode value through XMM registers. */
+ if (<SSEMODEI24:MODE>mode == DImode && !TARGET_64BIT
+ && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+ && optimize_function_for_speed_p (cfun))
+ {
+ emit_insn (gen_floatdi<X87MODEF:mode>2_i387_with_xmm (operands[0],
+ operands[1],
+ operands[2]));
+ DONE;
+ }
+})
+
+(define_insn "*floatsi<mode>2_vector_mixed_with_temp"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x")
+ (float:MODEF
+ (match_operand:SI 1 "nonimmediate_operand" "m,?r,r,m,!x")))
+ (clobber (match_operand:SI 2 "memory_operand" "=X,m,m,X,m"))]
+ "TARGET_SSE2 && TARGET_MIX_SSE_I387
+ && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+ "#"
+ [(set_attr "type" "fmov,multi,sseicvt,sseicvt,sseicvt")
+ (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<ssevecmode>")
+ (set_attr "unit" "*,i387,*,*,*")
+ (set_attr "athlon_decode" "*,*,double,direct,double")
+ (set_attr "amdfam10_decode" "*,*,vector,double,double")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsi<mode>2_vector_mixed"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+ (float:MODEF (match_operand:SI 1 "memory_operand" "m,m")))]
+ "TARGET_SSE2 && TARGET_MIX_SSE_I387
+ && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+ "@
+ fild%z1\t%1
+ #"
+ [(set_attr "type" "fmov,sseicvt")
+ (set_attr "mode" "<MODE>,<ssevecmode>")
+ (set_attr "unit" "i387,*")
+ (set_attr "athlon_decode" "*,direct")
+ (set_attr "amdfam10_decode" "*,double")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_with_temp"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x")
+ (float:MODEF
+ (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r,r,m")))
+ (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m,m,X"))]
+ "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387"
+ "#"
+ [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
+ (set_attr "mode" "<MODEF:MODE>")
+ (set_attr "unit" "*,i387,*,*")
+ (set_attr "athlon_decode" "*,*,double,direct")
+ (set_attr "amdfam10_decode" "*,*,vector,double")
+ (set_attr "fp_int_src" "true")])
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+ (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+ "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+ && TARGET_INTER_UNIT_CONVERSIONS
+ && reload_completed
+ && (SSE_REG_P (operands[0])
+ || (GET_CODE (operands[0]) == SUBREG
+ && SSE_REG_P (operands[0])))"
+ [(set (match_dup 0) (float:MODEF (match_dup 1)))]
+ "")
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+ (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+ "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+ && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
+ && reload_completed
+ && (SSE_REG_P (operands[0])
+ || (GET_CODE (operands[0]) == SUBREG
+ && SSE_REG_P (operands[0])))"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (float:MODEF (match_dup 2)))]
+ "")
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_interunit"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,x,x")
+ (float:MODEF
+ (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,r,m")))]
+ "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+ && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+ "@
+ fild%z1\t%1
+ %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}
+ %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
+ [(set_attr "type" "fmov,sseicvt,sseicvt")
+ (set_attr "prefix" "orig,maybe_vex,maybe_vex")
+ (set_attr "mode" "<MODEF:MODE>")
+ (set_attr "unit" "i387,*,*")
+ (set_attr "athlon_decode" "*,double,direct")
+ (set_attr "amdfam10_decode" "*,vector,double")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_nointerunit"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+ (float:MODEF
+ (match_operand:SSEMODEI24 1 "memory_operand" "m,m")))]
+ "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+ && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+ "@
+ fild%z1\t%1
+ %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
+ [(set_attr "type" "fmov,sseicvt")
+ (set_attr "prefix" "orig,maybe_vex")
+ (set_attr "mode" "<MODEF:MODE>")
+ (set_attr "athlon_decode" "*,direct")
+ (set_attr "amdfam10_decode" "*,double")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsi<mode>2_vector_sse_with_temp"
+ [(set (match_operand:MODEF 0 "register_operand" "=x,x,x")
+ (float:MODEF
+ (match_operand:SI 1 "nonimmediate_operand" "r,m,!x")))
+ (clobber (match_operand:SI 2 "memory_operand" "=m,X,m"))]
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+ "#"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "<MODE>,<MODE>,<ssevecmode>")
+ (set_attr "athlon_decode" "double,direct,double")
+ (set_attr "amdfam10_decode" "vector,double,double")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsi<mode>2_vector_sse"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (float:MODEF (match_operand:SI 1 "memory_operand" "m")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+ "#"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "<MODE>")
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "fp_int_src" "true")])
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (float:MODEF (match_operand:SI 1 "register_operand" "")))
+ (clobber (match_operand:SI 2 "memory_operand" ""))]
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+ && reload_completed
+ && (SSE_REG_P (operands[0])
+ || (GET_CODE (operands[0]) == SUBREG
+ && SSE_REG_P (operands[0])))"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+
+ operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+ <MODE>mode, 0);
+ if (GET_CODE (op1) == SUBREG)
+ op1 = SUBREG_REG (op1);
+
+ if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES)
+ {
+ operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+ emit_insn (gen_sse2_loadld (operands[4],
+ CONST0_RTX (V4SImode), operands[1]));
+ }
+ /* We can ignore possible trapping value in the
+ high part of SSE register for non-trapping math. */
+ else if (SSE_REG_P (op1) && !flag_trapping_math)
+ operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0);
+ else
+ {
+ operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+ emit_move_insn (operands[2], operands[1]);
+ emit_insn (gen_sse2_loadld (operands[4],
+ CONST0_RTX (V4SImode), operands[2]));
+ }
+ emit_insn
+ (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+ DONE;
+})
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (float:MODEF (match_operand:SI 1 "memory_operand" "")))
+ (clobber (match_operand:SI 2 "memory_operand" ""))]
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+ && reload_completed
+ && (SSE_REG_P (operands[0])
+ || (GET_CODE (operands[0]) == SUBREG
+ && SSE_REG_P (operands[0])))"
+ [(const_int 0)]
+{
+ operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+ <MODE>mode, 0);
+ operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+
+ emit_insn (gen_sse2_loadld (operands[4],
+ CONST0_RTX (V4SImode), operands[1]));
+ emit_insn
+ (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+ DONE;
+})
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (float:MODEF (match_operand:SI 1 "register_operand" "")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+ && reload_completed
+ && (SSE_REG_P (operands[0])
+ || (GET_CODE (operands[0]) == SUBREG
+ && SSE_REG_P (operands[0])))"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+
+ operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+ <MODE>mode, 0);
+ if (GET_CODE (op1) == SUBREG)
+ op1 = SUBREG_REG (op1);
+
+ if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES)
+ {
+ operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+ emit_insn (gen_sse2_loadld (operands[4],
+ CONST0_RTX (V4SImode), operands[1]));
+ }
+ /* We can ignore possible trapping value in the
+ high part of SSE register for non-trapping math. */
+ else if (SSE_REG_P (op1) && !flag_trapping_math)
+ operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0);
+ else
+ gcc_unreachable ();
+ emit_insn
+ (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+ DONE;
+})
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (float:MODEF (match_operand:SI 1 "memory_operand" "")))]
+ "TARGET_SSE2 && TARGET_SSE_MATH
+ && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+ && reload_completed
+ && (SSE_REG_P (operands[0])
+ || (GET_CODE (operands[0]) == SUBREG
+ && SSE_REG_P (operands[0])))"
+ [(const_int 0)]
+{
+ operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+ <MODE>mode, 0);
+ operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+
+ emit_insn (gen_sse2_loadld (operands[4],
+ CONST0_RTX (V4SImode), operands[1]));
+ emit_insn
+ (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+ DONE;
+})
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_with_temp"
+ [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+ (float:MODEF
+ (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))
+ (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,X"))]
+ "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
+ "#"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "<MODEF:MODE>")
+ (set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit"
+ [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+ (float:MODEF
+ (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))]
+ "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+ && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+ "%vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODEF:MODE>")
+ (set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")
+ (set_attr "fp_int_src" "true")])
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (float:MODEF (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))
+ (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+ "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+ && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
+ && reload_completed
+ && (SSE_REG_P (operands[0])
+ || (GET_CODE (operands[0]) == SUBREG
+ && SSE_REG_P (operands[0])))"
+ [(set (match_dup 0) (float:MODEF (match_dup 1)))]
+ "")
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_nointerunit"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (float:MODEF
+ (match_operand:SSEMODEI24 1 "memory_operand" "m")))]
+ "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+ && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+ "%vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODEF:MODE>")
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "fp_int_src" "true")])
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+ (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+ "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+ && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
+ && reload_completed
+ && (SSE_REG_P (operands[0])
+ || (GET_CODE (operands[0]) == SUBREG
+ && SSE_REG_P (operands[0])))"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (float:MODEF (match_dup 2)))]
+ "")
+
+(define_split
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (float:MODEF (match_operand:SSEMODEI24 1 "memory_operand" "")))
+ (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+ "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+ && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+ && reload_completed
+ && (SSE_REG_P (operands[0])
+ || (GET_CODE (operands[0]) == SUBREG
+ && SSE_REG_P (operands[0])))"
+ [(set (match_dup 0) (float:MODEF (match_dup 1)))]
+ "")
+
+(define_insn "*float<SSEMODEI24:mode><X87MODEF:mode>2_i387_with_temp"
+ [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+ (float:X87MODEF
+ (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r")))
+ (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m"))]
+ "TARGET_80387"
+ "@
+ fild%z1\t%1
+ #"
+ [(set_attr "type" "fmov,multi")
+ (set_attr "mode" "<X87MODEF:MODE>")
+ (set_attr "unit" "*,i387")
+ (set_attr "fp_int_src" "true")])
+
+(define_insn "*float<SSEMODEI24:mode><X87MODEF:mode>2_i387"
+ [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+ (float:X87MODEF
+ (match_operand:SSEMODEI24 1 "memory_operand" "m")))]
+ "TARGET_80387"
+ "fild%z1\t%1"
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "<X87MODEF:MODE>")
+ (set_attr "fp_int_src" "true")])
+
+(define_split
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+ (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+ "TARGET_80387
+ && reload_completed
+ && FP_REG_P (operands[0])"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+ "")
+
+(define_split
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (float:X87MODEF (match_operand:SSEMODEI24 1 "memory_operand" "")))
+ (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+ "TARGET_80387
+ && reload_completed
+ && FP_REG_P (operands[0])"
+ [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+ "")
+
+;; Avoid store forwarding (partial memory) stall penalty
+;; by passing DImode value through XMM registers. */
+
+(define_insn "floatdi<X87MODEF:mode>2_i387_with_xmm"
+ [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+ (float:X87MODEF
+ (match_operand:DI 1 "nonimmediate_operand" "m,?r")))
+ (clobber (match_scratch:V4SI 3 "=X,x"))
+ (clobber (match_scratch:V4SI 4 "=X,x"))
+ (clobber (match_operand:DI 2 "memory_operand" "=X,m"))]
+ "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+ && !TARGET_64BIT && optimize_function_for_speed_p (cfun)"
+ "#"
+ [(set_attr "type" "multi")
+ (set_attr "mode" "<X87MODEF:MODE>")
+ (set_attr "unit" "i387")
+ (set_attr "fp_int_src" "true")])
+
+(define_split
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (float:X87MODEF (match_operand:DI 1 "register_operand" "")))
+ (clobber (match_scratch:V4SI 3 ""))
+ (clobber (match_scratch:V4SI 4 ""))
+ (clobber (match_operand:DI 2 "memory_operand" ""))]
+ "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+ && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
+ && reload_completed
+ && FP_REG_P (operands[0])"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+{
+ /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+ Assemble the 64-bit DImode value in an xmm register. */
+ emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
+ gen_rtx_SUBREG (SImode, operands[1], 0)));
+ emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
+ gen_rtx_SUBREG (SImode, operands[1], 4)));
+ emit_insn (gen_sse2_punpckldq (operands[3], operands[3], operands[4]));
+
+ operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
+})
+
+(define_split
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (float:X87MODEF (match_operand:DI 1 "memory_operand" "")))
+ (clobber (match_scratch:V4SI 3 ""))
+ (clobber (match_scratch:V4SI 4 ""))
+ (clobber (match_operand:DI 2 "memory_operand" ""))]
+ "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+ && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
+ && reload_completed
+ && FP_REG_P (operands[0])"
+ [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+ "")
+
+;; Avoid store forwarding (partial memory) stall penalty by extending
+;; SImode value to DImode through XMM register instead of pushing two
+;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES
+;; targets benefit from this optimization. Also note that fild
+;; loads from memory only.
+
+(define_insn "*floatunssi<mode>2_1"
+ [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+ (unsigned_float:X87MODEF
+ (match_operand:SI 1 "nonimmediate_operand" "x,m")))
+ (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
+ (clobber (match_scratch:SI 3 "=X,x"))]
+ "!TARGET_64BIT
+ && TARGET_80387 && TARGET_SSE"
+ "#"
+ [(set_attr "type" "multi")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (unsigned_float:X87MODEF
+ (match_operand:SI 1 "register_operand" "")))
+ (clobber (match_operand:DI 2 "memory_operand" ""))
+ (clobber (match_scratch:SI 3 ""))]
+ "!TARGET_64BIT
+ && TARGET_80387 && TARGET_SSE
+ && reload_completed"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0)
+ (float:X87MODEF (match_dup 2)))]
+ "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);")
+
+(define_split
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (unsigned_float:X87MODEF
+ (match_operand:SI 1 "memory_operand" "")))
+ (clobber (match_operand:DI 2 "memory_operand" ""))
+ (clobber (match_scratch:SI 3 ""))]
+ "!TARGET_64BIT
+ && TARGET_80387 && TARGET_SSE
+ && reload_completed"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 0)
+ (float:X87MODEF (match_dup 2)))]
+{
+ emit_move_insn (operands[3], operands[1]);
+ operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0);
+})
+
+(define_expand "floatunssi<mode>2"
+ [(parallel
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (unsigned_float:X87MODEF
+ (match_operand:SI 1 "nonimmediate_operand" "")))
+ (clobber (match_dup 2))
+ (clobber (match_scratch:SI 3 ""))])]
+ "!TARGET_64BIT
+ && ((TARGET_80387 && TARGET_SSE)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
+{
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ {
+ ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
+ DONE;
+ }
+ else
+ {
+ int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
+ operands[2] = assign_386_stack_local (DImode, slot);
+ }
+})
+
+(define_expand "floatunsdisf2"
+ [(use (match_operand:SF 0 "register_operand" ""))
+ (use (match_operand:DI 1 "nonimmediate_operand" ""))]
+ "TARGET_64BIT && TARGET_SSE_MATH"
+ "x86_emit_floatuns (operands); DONE;")
+
+(define_expand "floatunsdidf2"
+ [(use (match_operand:DF 0 "register_operand" ""))
+ (use (match_operand:DI 1 "nonimmediate_operand" ""))]
+ "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)
+ && TARGET_SSE2 && TARGET_SSE_MATH"
+{
+ if (TARGET_64BIT)
+ x86_emit_floatuns (operands);
+ else
+ ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
+ DONE;
+})
+
+;; Add instructions
+
+;; %%% splits for addditi3
+
+(define_expand "addti3"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+ (match_operand:TI 2 "x86_64_general_operand" "")))]
+ "TARGET_64BIT"
+ "ix86_expand_binary_operator (PLUS, TImode, operands); DONE;")
+
+(define_insn "*addti3_1"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
+ (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:TI 2 "x86_64_general_operand" "roe,re")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, TImode, operands)"
+ "#")
+
+(define_split
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+ (match_operand:TI 2 "x86_64_general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)]
+ UNSPEC_ADD_CARRY))
+ (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))])
+ (parallel [(set (match_dup 3)
+ (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 4))
+ (match_dup 5)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_ti (&operands[0], 3, &operands[0], &operands[3]);")
+
+;; %%% splits for addsidi3
+; [(set (match_operand:DI 0 "nonimmediate_operand" "")
+; (plus:DI (match_operand:DI 1 "general_operand" "")
+; (zero_extend:DI (match_operand:SI 2 "general_operand" ""))))]
+
+(define_expand "adddi3"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "x86_64_general_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (PLUS, DImode, operands); DONE;")
+
+(define_insn "*adddi3_1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "general_operand" "roiF,riF")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+ "#")
+
+(define_split
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && reload_completed"
+ [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)]
+ UNSPEC_ADD_CARRY))
+ (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))])
+ (parallel [(set (match_dup 3)
+ (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 4))
+ (match_dup 5)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_di (&operands[0], 3, &operands[0], &operands[3]);")
+
+(define_insn "adddi3_carry_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (plus:DI (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "")
+ (match_operand:DI 1 "nonimmediate_operand" "%0,0"))
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+ "adc{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*adddi3_cc_rex64"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm")]
+ UNSPEC_ADD_CARRY))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (plus:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+ "add{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*<plusminus_insn><mode>3_cc_overflow"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (plusminus:SWI
+ (match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0")
+ (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
+ (match_dup 1)))
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ (plusminus:SWI (match_dup 1) (match_dup 2)))]
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "<plusminus_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*add<mode>3_cconly_overflow"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0")
+ (match_operand:SWI 2 "<general_operand>" "<r><i>m"))
+ (match_dup 1)))
+ (clobber (match_scratch:SWI 0 "=<r>"))]
+ "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+ "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*sub<mode>3_cconly_overflow"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
+ (match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m"))
+ (match_dup 0)))]
+ ""
+ "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "icmp")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*<plusminus_insn>si3_zext_cc_overflow"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (plusminus:SI (match_operand:SI 1 "nonimmediate_operand" "<comm>0")
+ (match_operand:SI 2 "general_operand" "g"))
+ (match_dup 1)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (plusminus:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+ "<plusminus_mnemonic>{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "addqi3_carry"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+ (plus:QI (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "")
+ (match_operand:QI 1 "nonimmediate_operand" "%0,0"))
+ (match_operand:QI 2 "general_operand" "qn,qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (PLUS, QImode, operands)"
+ "adc{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "QI")])
+
+(define_insn "addhi3_carry"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (plus:HI (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "")
+ (match_operand:HI 1 "nonimmediate_operand" "%0,0"))
+ (match_operand:HI 2 "general_operand" "rn,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (PLUS, HImode, operands)"
+ "adc{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "HI")])
+
+(define_insn "addsi3_carry"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
+ (match_operand:SI 1 "nonimmediate_operand" "%0,0"))
+ (match_operand:SI 2 "general_operand" "ri,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (PLUS, SImode, operands)"
+ "adc{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*addsi3_carry_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
+ (match_operand:SI 1 "nonimmediate_operand" "%0"))
+ (match_operand:SI 2 "general_operand" "g"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+ "adc{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*addsi3_cc"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm")]
+ UNSPEC_ADD_CARRY))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (plus:SI (match_dup 1) (match_dup 2)))]
+ "ix86_binary_operator_ok (PLUS, SImode, operands)"
+ "add{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "addqi3_cc"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:QI 2 "general_operand" "qn,qm")]
+ UNSPEC_ADD_CARRY))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+ (plus:QI (match_dup 1) (match_dup 2)))]
+ "ix86_binary_operator_ok (PLUS, QImode, operands)"
+ "add{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_expand "addsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (PLUS, SImode, operands); DONE;")
+
+(define_insn "*lea_1"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (match_operand:SI 1 "no_seg_address_operand" "p"))]
+ "!TARGET_64BIT"
+ "lea{l}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn "*lea_1_rex64"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0))]
+ "TARGET_64BIT"
+ "lea{l}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn "*lea_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0)))]
+ "TARGET_64BIT"
+ "lea{l}\t{%a1, %k0|%k0, %a1}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn "*lea_2_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (match_operand:DI 1 "no_seg_address_operand" "p"))]
+ "TARGET_64BIT"
+ "lea{q}\t{%a1, %0|%0, %a1}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "DI")])
+
+;; The lea patterns for non-Pmodes needs to be matched by several
+;; insns converted to real lea by splitters.
+
+(define_insn_and_split "*lea_general_1"
+ [(set (match_operand 0 "register_operand" "=r")
+ (plus (plus (match_operand 1 "index_register_operand" "l")
+ (match_operand 2 "register_operand" "r"))
+ (match_operand 3 "immediate_operand" "i")))]
+ "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+ || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+ && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])
+ && GET_MODE (operands[0]) == GET_MODE (operands[2])
+ && (GET_MODE (operands[0]) == GET_MODE (operands[3])
+ || GET_MODE (operands[3]) == VOIDmode)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx pat;
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[2] = gen_lowpart (Pmode, operands[2]);
+ operands[3] = gen_lowpart (Pmode, operands[3]);
+ pat = gen_rtx_PLUS (Pmode, gen_rtx_PLUS (Pmode, operands[1], operands[2]),
+ operands[3]);
+ if (Pmode != SImode)
+ pat = gen_rtx_SUBREG (SImode, pat, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+ DONE;
+}
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (plus:SI (plus:SI (match_operand:SI 1 "index_register_operand" "l")
+ (match_operand:SI 2 "register_operand" "r"))
+ (match_operand:SI 3 "immediate_operand" "i"))))]
+ "TARGET_64BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (zero_extend:DI (subreg:SI (plus:DI (plus:DI (match_dup 1)
+ (match_dup 2))
+ (match_dup 3)) 0)))]
+{
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[2] = gen_lowpart (Pmode, operands[2]);
+ operands[3] = gen_lowpart (Pmode, operands[3]);
+}
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_2"
+ [(set (match_operand 0 "register_operand" "=r")
+ (plus (mult (match_operand 1 "index_register_operand" "l")
+ (match_operand 2 "const248_operand" "i"))
+ (match_operand 3 "nonmemory_operand" "ri")))]
+ "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+ || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+ && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])
+ && (GET_MODE (operands[0]) == GET_MODE (operands[3])
+ || GET_MODE (operands[3]) == VOIDmode)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx pat;
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[3] = gen_lowpart (Pmode, operands[3]);
+ pat = gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], operands[2]),
+ operands[3]);
+ if (Pmode != SImode)
+ pat = gen_rtx_SUBREG (SImode, pat, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+ DONE;
+}
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_2_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "l")
+ (match_operand:SI 2 "const248_operand" "n"))
+ (match_operand:SI 3 "nonmemory_operand" "ri"))))]
+ "TARGET_64BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (zero_extend:DI (subreg:SI (plus:DI (mult:DI (match_dup 1)
+ (match_dup 2))
+ (match_dup 3)) 0)))]
+{
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[3] = gen_lowpart (Pmode, operands[3]);
+}
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_3"
+ [(set (match_operand 0 "register_operand" "=r")
+ (plus (plus (mult (match_operand 1 "index_register_operand" "l")
+ (match_operand 2 "const248_operand" "i"))
+ (match_operand 3 "register_operand" "r"))
+ (match_operand 4 "immediate_operand" "i")))]
+ "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
+ || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
+ && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && GET_MODE (operands[0]) == GET_MODE (operands[1])
+ && GET_MODE (operands[0]) == GET_MODE (operands[3])"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx pat;
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[3] = gen_lowpart (Pmode, operands[3]);
+ operands[4] = gen_lowpart (Pmode, operands[4]);
+ pat = gen_rtx_PLUS (Pmode,
+ gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1],
+ operands[2]),
+ operands[3]),
+ operands[4]);
+ if (Pmode != SImode)
+ pat = gen_rtx_SUBREG (SImode, pat, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+ DONE;
+}
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*lea_general_3_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (plus:SI (plus:SI (mult:SI
+ (match_operand:SI 1 "index_register_operand" "l")
+ (match_operand:SI 2 "const248_operand" "n"))
+ (match_operand:SI 3 "register_operand" "r"))
+ (match_operand:SI 4 "immediate_operand" "i"))))]
+ "TARGET_64BIT"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (zero_extend:DI (subreg:SI (plus:DI (plus:DI (mult:DI (match_dup 1)
+ (match_dup 2))
+ (match_dup 3))
+ (match_dup 4)) 0)))]
+{
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[3] = gen_lowpart (Pmode, operands[3]);
+ operands[4] = gen_lowpart (Pmode, operands[4]);
+}
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")])
+
+(define_insn "*adddi_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r")
+ (match_operand:DI 2 "x86_64_general_operand" "rme,re,le")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ return "lea{q}\t{%a2, %0|%0, %a2}";
+
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{q}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{q}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ /* Avoid overflows. */
+ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{q}\t{%2, %0|%0, %2}";
+ }
+ return "add{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "2")
+ (const_string "lea")
+ ; Current assemblers are broken and do not allow @GOTOFF in
+ ; ought but a memory context.
+ (match_operand:DI 2 "pic_symbolic_operand" "")
+ (const_string "lea")
+ (match_operand:DI 2 "incdec_operand" "")
+ (const_string "incdec")
+ ]
+ (const_string "alu")))
+ (set_attr "mode" "DI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (plus:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "x86_64_nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])"
+ [(set (match_dup 0)
+ (plus:DI (match_dup 1)
+ (match_dup 2)))]
+ "")
+
+(define_insn "*adddi_2_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "rme,re"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm")
+ (plus:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (PLUS, DImode, operands)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{q}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{q}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* ???? We ought to handle there the 32bit case too
+ - do we need new constraint? */
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ /* Avoid overflows. */
+ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{q}\t{%2, %0|%0, %2}";
+ }
+ return "add{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:DI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "DI")])
+
+(define_insn "*adddi_3_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (neg:DI (match_operand:DI 2 "x86_64_general_operand" "rme"))
+ (match_operand:DI 1 "x86_64_general_operand" "%0")))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCZmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{q}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{q}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* ???? We ought to handle there the 32bit case too
+ - do we need new constraint? */
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ /* Avoid overflows. */
+ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{q}\t{%2, %0|%0, %2}";
+ }
+ return "add{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:DI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "DI")])
+
+; For comparisons against 1, -1 and 128, we may generate better code
+; by converting cmp to add, inc or dec as done by peephole2. This pattern
+; is matched then. We can't accept general immediate, because for
+; case of overflows, the result is messed up.
+; This pattern also don't hold of 0x8000000000000000, since the value overflows
+; when negated.
+; Also carry flag is reversed compared to cmp, so this conversion is valid
+; only for comparisons not depending on it.
+(define_insn "*adddi_4_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:DI 2 "x86_64_immediate_operand" "e")))
+ (clobber (match_scratch:DI 0 "=rm"))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCGCmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == constm1_rtx)
+ return "inc{q}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == const1_rtx);
+ return "dec{q}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if ((INTVAL (operands[2]) == -128
+ || (INTVAL (operands[2]) > 0
+ && INTVAL (operands[2]) != 128))
+ /* Avoid overflows. */
+ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))))
+ return "sub{q}\t{%2, %0|%0, %2}";
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "add{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:DI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "DI")])
+
+(define_insn "*adddi_5_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+ (match_operand:DI 2 "x86_64_general_operand" "rme"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{q}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{q}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ /* Avoid overflows. */
+ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{q}\t{%2, %0|%0, %2}";
+ }
+ return "add{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:DI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "DI")])
+
+
+(define_insn "*addsi_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r")
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r")
+ (match_operand:SI 2 "general_operand" "g,ri,li")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (PLUS, SImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ return "lea{l}\t{%a2, %0|%0, %a2}";
+
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %0|%0, %2}";
+ }
+ return "add{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "2")
+ (const_string "lea")
+ ; Current assemblers are broken and do not allow @GOTOFF in
+ ; ought but a memory context.
+ (match_operand:SI 2 "pic_symbolic_operand" "")
+ (const_string "lea")
+ (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ ]
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (plus (match_operand 1 "register_operand" "")
+ (match_operand 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])"
+ [(const_int 0)]
+{
+ rtx pat;
+ /* In -fPIC mode the constructs like (const (unspec [symbol_ref]))
+ may confuse gen_lowpart. */
+ if (GET_MODE (operands[0]) != Pmode)
+ {
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[2] = gen_lowpart (Pmode, operands[2]);
+ }
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]);
+ if (Pmode != SImode)
+ pat = gen_rtx_SUBREG (SImode, pat, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+ DONE;
+})
+
+;; It may seem that nonimmediate operand is proper one for operand 1.
+;; The addsi_1 pattern allows nonimmediate operand at that place and
+;; we take care in ix86_binary_operator_ok to not allow two memory
+;; operands so proper swapping will be done in reload. This allow
+;; patterns constructed from addsi_1 to match.
+(define_insn "addsi_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extend:DI
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
+ (match_operand:SI 2 "general_operand" "g,li"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ return "lea{l}\t{%a2, %k0|%k0, %a2}";
+
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%k0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%k0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %k0|%k0, %2}";
+ }
+ return "add{l}\t{%2, %k0|%k0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "lea")
+ ; Current assemblers are broken and do not allow @GOTOFF in
+ ; ought but a memory context.
+ (match_operand:SI 2 "pic_symbolic_operand" "")
+ (const_string "lea")
+ (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ ]
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (zero_extend:DI
+ (plus:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])"
+ [(set (match_dup 0)
+ (zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))]
+{
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[2] = gen_lowpart (Pmode, operands[2]);
+})
+
+(define_insn "*addsi_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "g,ri"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
+ (plus:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (PLUS, SImode, operands)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %0|%0, %2}";
+ }
+ return "add{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*addsi_2_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "g"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (PLUS, SImode, operands)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%k0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%k0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %k0|%k0, %2}";
+ }
+ return "add{l}\t{%2, %k0|%k0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*addsi_3"
+ [(set (reg FLAGS_REG)
+ (compare (neg:SI (match_operand:SI 2 "general_operand" "g"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0")))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCZmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %0|%0, %2}";
+ }
+ return "add{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*addsi_3_zext"
+ [(set (reg FLAGS_REG)
+ (compare (neg:SI (match_operand:SI 2 "general_operand" "g"))
+ (match_operand:SI 1 "nonimmediate_operand" "%0")))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode)
+ && ix86_binary_operator_ok (PLUS, SImode, operands)
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%k0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%k0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %k0|%k0, %2}";
+ }
+ return "add{l}\t{%2, %k0|%k0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+; For comparisons against 1, -1 and 128, we may generate better code
+; by converting cmp to add, inc or dec as done by peephole2. This pattern
+; is matched then. We can't accept general immediate, because for
+; case of overflows, the result is messed up.
+; This pattern also don't hold of 0x80000000, since the value overflows
+; when negated.
+; Also carry flag is reversed compared to cmp, so this conversion is valid
+; only for comparisons not depending on it.
+(define_insn "*addsi_4"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:SI 2 "const_int_operand" "n")))
+ (clobber (match_scratch:SI 0 "=rm"))]
+ "ix86_match_ccmode (insn, CCGCmode)
+ && (INTVAL (operands[2]) & 0xffffffff) != 0x80000000"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == constm1_rtx)
+ return "inc{l}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == const1_rtx);
+ return "dec{l}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if ((INTVAL (operands[2]) == -128
+ || (INTVAL (operands[2]) > 0
+ && INTVAL (operands[2]) != 128)))
+ return "sub{l}\t{%2, %0|%0, %2}";
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "add{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*addsi_5"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "g"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+ /* Current assemblers are broken and do not allow @GOTOFF in
+ ought but a memory context. */
+ && ! pic_symbolic_operand (operands[2], VOIDmode)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (operands[2] == const1_rtx)
+ return "inc{l}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{l}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %0|%0, %2}";
+ }
+ return "add{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:SI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+(define_expand "addhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:HI 2 "general_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (PLUS, HImode, operands); DONE;")
+
+;; %%% After Dave's SUBREG_BYTE stuff goes in, re-enable incb %ah
+;; type optimizations enabled by define-splits. This is not important
+;; for PII, and in fact harmful because of partial register stalls.
+
+(define_insn "*addhi_1_lea"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r")
+ (match_operand:HI 2 "general_operand" "rn,rm,ln")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (PLUS, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ return "#";
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{w}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{w}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{w}\t{%2, %0|%0, %2}";
+ }
+ return "add{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (eq_attr "alternative" "2")
+ (const_string "lea")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu"))))
+ (set_attr "mode" "HI,HI,SI")])
+
+(define_insn "*addhi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "rn,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (PLUS, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{w}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{w}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{w}\t{%2, %0|%0, %2}";
+ }
+ return "add{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "HI")])
+
+(define_insn "*addhi_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "rmn,rn"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
+ (plus:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (PLUS, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{w}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{w}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{w}\t{%2, %0|%0, %2}";
+ }
+ return "add{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "HI")])
+
+(define_insn "*addhi_3"
+ [(set (reg FLAGS_REG)
+ (compare (neg:HI (match_operand:HI 2 "general_operand" "rmn"))
+ (match_operand:HI 1 "nonimmediate_operand" "%0")))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCZmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{w}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{w}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{w}\t{%2, %0|%0, %2}";
+ }
+ return "add{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "HI")])
+
+; See comments above addsi_4 for details.
+(define_insn "*addhi_4"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:HI 2 "const_int_operand" "n")))
+ (clobber (match_scratch:HI 0 "=rm"))]
+ "ix86_match_ccmode (insn, CCGCmode)
+ && (INTVAL (operands[2]) & 0xffff) != 0x8000"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == constm1_rtx)
+ return "inc{w}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == const1_rtx);
+ return "dec{w}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if ((INTVAL (operands[2]) == -128
+ || (INTVAL (operands[2]) > 0
+ && INTVAL (operands[2]) != 128)))
+ return "sub{w}\t{%2, %0|%0, %2}";
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "add{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "SI")])
+
+
+(define_insn "*addhi_5"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
+ (match_operand:HI 2 "general_operand" "rmn"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{w}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return "dec{w}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{w}\t{%2, %0|%0, %2}";
+ }
+ return "add{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "HI")])
+
+(define_expand "addqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "general_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (PLUS, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2. What to do?
+(define_insn "*addqi_1_lea"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r")
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,r")
+ (match_operand:QI 2 "general_operand" "qn,qmn,rn,ln")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (PLUS, QImode, operands)"
+{
+ int widen = (which_alternative == 2);
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ return "#";
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ if (widen)
+ return "sub{l}\t{%2, %k0|%k0, %2}";
+ else
+ return "sub{b}\t{%2, %0|%0, %2}";
+ }
+ if (widen)
+ return "add{l}\t{%k2, %k0|%k0, %k2}";
+ else
+ return "add{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (eq_attr "alternative" "3")
+ (const_string "lea")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu"))))
+ (set_attr "mode" "QI,QI,SI,SI")])
+
+(define_insn "*addqi_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+ (match_operand:QI 2 "general_operand" "qn,qmn,rn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (PLUS, QImode, operands)"
+{
+ int widen = (which_alternative == 2);
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx);
+ return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ if (widen)
+ return "sub{l}\t{%2, %k0|%k0, %2}";
+ else
+ return "sub{b}\t{%2, %0|%0, %2}";
+ }
+ if (widen)
+ return "add{l}\t{%k2, %k0|%k0, %k2}";
+ else
+ return "add{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*addqi_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+ (plus:QI (match_dup 0)
+ (match_operand:QI 1 "general_operand" "qn,qnm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[1] == const1_rtx)
+ return "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[1] == constm1_rtx);
+ return "dec{b}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. */
+ if (CONST_INT_P (operands[1])
+ && INTVAL (operands[1]) < 0)
+ {
+ operands[1] = GEN_INT (-INTVAL (operands[1]));
+ return "sub{b}\t{%1, %0|%0, %1}";
+ }
+ return "add{b}\t{%1, %0|%0, %1}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 1 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu1")))
+ (set (attr "memory")
+ (if_then_else (match_operand 1 "memory_operand" "")
+ (const_string "load")
+ (const_string "none")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*addqi_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:QI 2 "general_operand" "qmn,qn"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
+ (plus:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (PLUS, QImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx
+ || (CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) == 255));
+ return "dec{b}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */
+ if (CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{b}\t{%2, %0|%0, %2}";
+ }
+ return "add{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*addqi_3"
+ [(set (reg FLAGS_REG)
+ (compare (neg:QI (match_operand:QI 2 "general_operand" "qmn"))
+ (match_operand:QI 1 "nonimmediate_operand" "%0")))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCZmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx
+ || (CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) == 255));
+ return "dec{b}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */
+ if (CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{b}\t{%2, %0|%0, %2}";
+ }
+ return "add{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI")])
+
+; See comments above addsi_4 for details.
+(define_insn "*addqi_4"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_int_operand" "n")))
+ (clobber (match_scratch:QI 0 "=qm"))]
+ "ix86_match_ccmode (insn, CCGCmode)
+ && (INTVAL (operands[2]) & 0xff) != 0x80"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == constm1_rtx
+ || (CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) == 255))
+ return "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == const1_rtx);
+ return "dec{b}\t%0";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "add{b}\t{%2, %0|%0, %2}";
+ }
+ return "sub{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:HI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI")])
+
+
+(define_insn "*addqi_5"
+ [(set (reg FLAGS_REG)
+ (compare
+ (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+ (match_operand:QI 2 "general_operand" "qmn"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{b}\t%0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx
+ || (CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) == 255));
+ return "dec{b}\t%0";
+ }
+
+ default:
+ /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */
+ if (CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) < 0)
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{b}\t{%2, %0|%0, %2}";
+ }
+ return "add{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI")])
+
+
+(define_insn "addqi_ext_1"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (plus:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:QI 2 "general_operand" "Qmn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{b}\t%h0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx
+ || (CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) == 255));
+ return "dec{b}\t%h0";
+ }
+
+ default:
+ return "add{b}\t{%2, %h0|%h0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*addqi_ext_1_rex64"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (plus:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:QI 2 "nonmemory_operand" "Qn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_INCDEC:
+ if (operands[2] == const1_rtx)
+ return "inc{b}\t%h0";
+ else
+ {
+ gcc_assert (operands[2] == constm1_rtx
+ || (CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) == 255));
+ return "dec{b}\t%h0";
+ }
+
+ default:
+ return "add{b}\t{%2, %h0|%h0, %2}";
+ }
+}
+ [(set (attr "type")
+ (if_then_else (match_operand:QI 2 "incdec_operand" "")
+ (const_string "incdec")
+ (const_string "alu")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*addqi_ext_2"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (plus:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "%0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SI
+ (match_operand 2 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "add{b}\t{%h2, %h0|%h0, %h2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "addxf3"
+ [(set (match_operand:XF 0 "register_operand" "")
+ (plus:XF (match_operand:XF 1 "register_operand" "")
+ (match_operand:XF 2 "register_operand" "")))]
+ "TARGET_80387"
+ "")
+
+(define_expand "add<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (plus:MODEF (match_operand:MODEF 1 "register_operand" "")
+ (match_operand:MODEF 2 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+ "")
+
+;; Subtract instructions
+
+;; %%% splits for subditi3
+
+(define_expand "subti3"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (minus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+ (match_operand:TI 2 "x86_64_general_operand" "")))]
+ "TARGET_64BIT"
+ "ix86_expand_binary_operator (MINUS, TImode, operands); DONE;")
+
+(define_insn "*subti3_1"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
+ (minus:TI (match_operand:TI 1 "nonimmediate_operand" "0,0")
+ (match_operand:TI 2 "x86_64_general_operand" "roe,re")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, TImode, operands)"
+ "#")
+
+(define_split
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (minus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+ (match_operand:TI 2 "x86_64_general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (minus:DI (match_dup 1) (match_dup 2)))])
+ (parallel [(set (match_dup 3)
+ (minus:DI (match_dup 4)
+ (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 5))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_ti (&operands[0], 3, &operands[0], &operands[3]);")
+
+;; %%% splits for subsidi3
+
+(define_expand "subdi3"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "x86_64_general_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (MINUS, DImode, operands); DONE;")
+
+(define_insn "*subdi3_1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o")
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:DI 2 "general_operand" "roiF,riF")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
+ "#")
+
+(define_split
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && reload_completed"
+ [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+ (parallel [(set (match_dup 3)
+ (minus:SI (match_dup 4)
+ (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 5))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_di (&operands[0], 3, &operands[0], &operands[3]);")
+
+(define_insn "subdi3_carry_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
+ "sbb{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*subdi_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
+ "sub{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*subdi_2_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (minus:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (MINUS, DImode, operands)"
+ "sub{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*subdi_3_rex63"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (minus:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
+ && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "subqi3_carry"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+ (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "")
+ (match_operand:QI 2 "general_operand" "qn,qm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, QImode, operands)"
+ "sbb{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "QI")])
+
+(define_insn "subhi3_carry"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "")
+ (match_operand:HI 2 "general_operand" "rn,rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, HImode, operands)"
+ "sbb{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "HI")])
+
+(define_insn "subsi3_carry"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
+ (match_operand:SI 2 "general_operand" "ri,rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sbb{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "SI")])
+
+(define_insn "subsi3_carry_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (minus:SI (match_operand:SI 1 "register_operand" "0")
+ (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
+ (match_operand:SI 2 "general_operand" "g")))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sbb{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "SI")])
+
+(define_expand "subsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (minus:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (MINUS, SImode, operands); DONE;")
+
+(define_insn "*subsi_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*subsi_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (minus:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "general_operand" "g"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*subsi_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (minus:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*subsi_2_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (minus:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "general_operand" "g"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (minus:SI (match_dup 1)
+ (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*subsi_3"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm")))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (minus:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCmode)
+ && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*subsi_3_zext"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "general_operand" "g")))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (minus:SI (match_dup 1)
+ (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
+ && ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sub{l}\t{%2, %1|%1, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_expand "subhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (minus:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:HI 2 "general_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (MINUS, HImode, operands); DONE;")
+
+(define_insn "*subhi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:HI 2 "general_operand" "rn,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, HImode, operands)"
+ "sub{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_insn "*subhi_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:HI 2 "general_operand" "rn,rm"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (minus:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (MINUS, HImode, operands)"
+ "sub{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_insn "*subhi_3"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:HI 2 "general_operand" "rn,rm")))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (minus:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCmode)
+ && ix86_binary_operator_ok (MINUS, HImode, operands)"
+ "sub{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_expand "subqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (minus:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "general_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (MINUS, QImode, operands); DONE;")
+
+(define_insn "*subqi_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+ (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "general_operand" "qn,qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, QImode, operands)"
+ "sub{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*subqi_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+ (minus:QI (match_dup 0)
+ (match_operand:QI 1 "general_operand" "qn,qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "sub{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*subqi_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "general_operand" "qn,qm"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+ (minus:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (MINUS, QImode, operands)"
+ "sub{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*subqi_3"
+ [(set (reg FLAGS_REG)
+ (compare (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "general_operand" "qn,qm")))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+ (minus:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCmode)
+ && ix86_binary_operator_ok (MINUS, QImode, operands)"
+ "sub{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "subxf3"
+ [(set (match_operand:XF 0 "register_operand" "")
+ (minus:XF (match_operand:XF 1 "register_operand" "")
+ (match_operand:XF 2 "register_operand" "")))]
+ "TARGET_80387"
+ "")
+
+(define_expand "sub<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (minus:MODEF (match_operand:MODEF 1 "register_operand" "")
+ (match_operand:MODEF 2 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+ "")
+
+;; Multiply instructions
+
+(define_expand "muldi3"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "x86_64_general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "")
+
+;; On AMDFAM10
+;; IMUL reg64, reg64, imm8 Direct
+;; IMUL reg64, mem64, imm8 VectorPath
+;; IMUL reg64, reg64, imm32 Direct
+;; IMUL reg64, mem64, imm32 VectorPath
+;; IMUL reg64, reg64 Direct
+;; IMUL reg64, mem64 Direct
+
+(define_insn "*muldi3_1_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+ (mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0")
+ (match_operand:DI 2 "x86_64_general_operand" "K,e,mr")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ imul{q}\t{%2, %1, %0|%0, %1, %2}
+ imul{q}\t{%2, %1, %0|%0, %1, %2}
+ imul{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "imul")
+ (set_attr "prefix_0f" "0,0,1")
+ (set (attr "athlon_decode")
+ (cond [(eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (eq_attr "alternative" "1")
+ (const_string "vector")
+ (and (eq_attr "alternative" "2")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(and (eq_attr "alternative" "0,1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
+ (set_attr "mode" "DI")])
+
+(define_expand "mulsi3"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+;; On AMDFAM10
+;; IMUL reg32, reg32, imm8 Direct
+;; IMUL reg32, mem32, imm8 VectorPath
+;; IMUL reg32, reg32, imm32 Direct
+;; IMUL reg32, mem32, imm32 VectorPath
+;; IMUL reg32, reg32 Direct
+;; IMUL reg32, mem32 Direct
+
+(define_insn "*mulsi3_1"
+ [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+ (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
+ (match_operand:SI 2 "general_operand" "K,i,mr")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ imul{l}\t{%2, %1, %0|%0, %1, %2}
+ imul{l}\t{%2, %1, %0|%0, %1, %2}
+ imul{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "imul")
+ (set_attr "prefix_0f" "0,0,1")
+ (set (attr "athlon_decode")
+ (cond [(eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (eq_attr "alternative" "1")
+ (const_string "vector")
+ (and (eq_attr "alternative" "2")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(and (eq_attr "alternative" "0,1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*mulsi3_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+ (zero_extend:DI
+ (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
+ (match_operand:SI 2 "general_operand" "K,i,mr"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+ imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+ imul{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "imul")
+ (set_attr "prefix_0f" "0,0,1")
+ (set (attr "athlon_decode")
+ (cond [(eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (eq_attr "alternative" "1")
+ (const_string "vector")
+ (and (eq_attr "alternative" "2")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(and (eq_attr "alternative" "0,1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
+ (set_attr "mode" "SI")])
+
+(define_expand "mulhi3"
+ [(parallel [(set (match_operand:HI 0 "register_operand" "")
+ (mult:HI (match_operand:HI 1 "register_operand" "")
+ (match_operand:HI 2 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_HIMODE_MATH"
+ "")
+
+;; On AMDFAM10
+;; IMUL reg16, reg16, imm8 VectorPath
+;; IMUL reg16, mem16, imm8 VectorPath
+;; IMUL reg16, reg16, imm16 VectorPath
+;; IMUL reg16, mem16, imm16 VectorPath
+;; IMUL reg16, reg16 Direct
+;; IMUL reg16, mem16 Direct
+(define_insn "*mulhi3_1"
+ [(set (match_operand:HI 0 "register_operand" "=r,r,r")
+ (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0")
+ (match_operand:HI 2 "general_operand" "K,n,mr")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ imul{w}\t{%2, %1, %0|%0, %1, %2}
+ imul{w}\t{%2, %1, %0|%0, %1, %2}
+ imul{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "imul")
+ (set_attr "prefix_0f" "0,0,1")
+ (set (attr "athlon_decode")
+ (cond [(eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (eq_attr "alternative" "1,2")
+ (const_string "vector")]
+ (const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(eq_attr "alternative" "0,1")
+ (const_string "vector")]
+ (const_string "direct")))
+ (set_attr "mode" "HI")])
+
+(define_expand "mulqi3"
+ [(parallel [(set (match_operand:QI 0 "register_operand" "")
+ (mult:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_QIMODE_MATH"
+ "")
+
+;;On AMDFAM10
+;; MUL reg8 Direct
+;; MUL mem8 Direct
+
+(define_insn "*mulqi3_1"
+ [(set (match_operand:QI 0 "register_operand" "=a")
+ (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+ (match_operand:QI 2 "nonimmediate_operand" "qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "mul{b}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "direct")))
+ (set_attr "amdfam10_decode" "direct")
+ (set_attr "mode" "QI")])
+
+(define_expand "umulqihi3"
+ [(parallel [(set (match_operand:HI 0 "register_operand" "")
+ (mult:HI (zero_extend:HI
+ (match_operand:QI 1 "nonimmediate_operand" ""))
+ (zero_extend:HI
+ (match_operand:QI 2 "register_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_QIMODE_MATH"
+ "")
+
+(define_insn "*umulqihi3_1"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (mult:HI (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "mul{b}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "direct")))
+ (set_attr "amdfam10_decode" "direct")
+ (set_attr "mode" "QI")])
+
+(define_expand "mulqihi3"
+ [(parallel [(set (match_operand:HI 0 "register_operand" "")
+ (mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))
+ (sign_extend:HI (match_operand:QI 2 "register_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_QIMODE_MATH"
+ "")
+
+(define_insn "*mulqihi3_insn"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "imul{b}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "direct")))
+ (set_attr "amdfam10_decode" "direct")
+ (set_attr "mode" "QI")])
+
+(define_expand "umulditi3"
+ [(parallel [(set (match_operand:TI 0 "register_operand" "")
+ (mult:TI (zero_extend:TI
+ (match_operand:DI 1 "nonimmediate_operand" ""))
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "*umulditi3_insn"
+ [(set (match_operand:TI 0 "register_operand" "=A")
+ (mult:TI (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "mul{q}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "mode" "DI")])
+
+;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers
+(define_expand "umulsidi3"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (zero_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "!TARGET_64BIT"
+ "")
+
+(define_insn "*umulsidi3_insn"
+ [(set (match_operand:DI 0 "register_operand" "=A")
+ (mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "mul{l}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "mode" "SI")])
+
+(define_expand "mulditi3"
+ [(parallel [(set (match_operand:TI 0 "register_operand" "")
+ (mult:TI (sign_extend:TI
+ (match_operand:DI 1 "nonimmediate_operand" ""))
+ (sign_extend:TI
+ (match_operand:DI 2 "register_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "*mulditi3_insn"
+ [(set (match_operand:TI 0 "register_operand" "=A")
+ (mult:TI (sign_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "imul{q}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "mode" "DI")])
+
+(define_expand "mulsidi3"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (sign_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (sign_extend:DI
+ (match_operand:SI 2 "register_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "!TARGET_64BIT"
+ "")
+
+(define_insn "*mulsidi3_insn"
+ [(set (match_operand:DI 0 "register_operand" "=A")
+ (mult:DI (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "imul{l}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "mode" "SI")])
+
+(define_expand "umuldi3_highpart"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (truncate:DI
+ (lshiftrt:TI
+ (mult:TI (zero_extend:TI
+ (match_operand:DI 1 "nonimmediate_operand" ""))
+ (zero_extend:TI
+ (match_operand:DI 2 "register_operand" "")))
+ (const_int 64))))
+ (clobber (match_scratch:DI 3 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "*umuldi3_highpart_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=d")
+ (truncate:DI
+ (lshiftrt:TI
+ (mult:TI (zero_extend:TI
+ (match_operand:DI 1 "nonimmediate_operand" "%a"))
+ (zero_extend:TI
+ (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (const_int 64))))
+ (clobber (match_scratch:DI 3 "=1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "mul{q}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "mode" "DI")])
+
+(define_expand "umulsi3_highpart"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (zero_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (zero_extend:DI
+ (match_operand:SI 2 "register_operand" "")))
+ (const_int 32))))
+ (clobber (match_scratch:SI 3 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_insn "*umulsi3_highpart_insn"
+ [(set (match_operand:SI 0 "register_operand" "=d")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (zero_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" "%a"))
+ (zero_extend:DI
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (const_int 32))))
+ (clobber (match_scratch:SI 3 "=1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "mul{l}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "mode" "SI")])
+
+(define_insn "*umulsi3_highpart_zext"
+ [(set (match_operand:DI 0 "register_operand" "=d")
+ (zero_extend:DI (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (zero_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" "%a"))
+ (zero_extend:DI
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (const_int 32)))))
+ (clobber (match_scratch:SI 3 "=1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "mul{l}\t%2"
+ [(set_attr "type" "imul")
+ (set_attr "length_immediate" "0")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "mode" "SI")])
+
+(define_expand "smuldi3_highpart"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (truncate:DI
+ (lshiftrt:TI
+ (mult:TI (sign_extend:TI
+ (match_operand:DI 1 "nonimmediate_operand" ""))
+ (sign_extend:TI
+ (match_operand:DI 2 "register_operand" "")))
+ (const_int 64))))
+ (clobber (match_scratch:DI 3 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "*smuldi3_highpart_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=d")
+ (truncate:DI
+ (lshiftrt:TI
+ (mult:TI (sign_extend:TI
+ (match_operand:DI 1 "nonimmediate_operand" "%a"))
+ (sign_extend:TI
+ (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (const_int 64))))
+ (clobber (match_scratch:DI 3 "=1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "imul{q}\t%2"
+ [(set_attr "type" "imul")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "mode" "DI")])
+
+(define_expand "smulsi3_highpart"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (sign_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (sign_extend:DI
+ (match_operand:SI 2 "register_operand" "")))
+ (const_int 32))))
+ (clobber (match_scratch:SI 3 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+(define_insn "*smulsi3_highpart_insn"
+ [(set (match_operand:SI 0 "register_operand" "=d")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (sign_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" "%a"))
+ (sign_extend:DI
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (const_int 32))))
+ (clobber (match_scratch:SI 3 "=1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "imul{l}\t%2"
+ [(set_attr "type" "imul")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "mode" "SI")])
+
+(define_insn "*smulsi3_highpart_zext"
+ [(set (match_operand:DI 0 "register_operand" "=d")
+ (zero_extend:DI (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (sign_extend:DI
+ (match_operand:SI 1 "nonimmediate_operand" "%a"))
+ (sign_extend:DI
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (const_int 32)))))
+ (clobber (match_scratch:SI 3 "=1"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "imul{l}\t%2"
+ [(set_attr "type" "imul")
+ (set (attr "athlon_decode")
+ (if_then_else (eq_attr "cpu" "athlon")
+ (const_string "vector")
+ (const_string "double")))
+ (set_attr "amdfam10_decode" "double")
+ (set_attr "mode" "SI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "mulxf3"
+ [(set (match_operand:XF 0 "register_operand" "")
+ (mult:XF (match_operand:XF 1 "register_operand" "")
+ (match_operand:XF 2 "register_operand" "")))]
+ "TARGET_80387"
+ "")
+
+(define_expand "mul<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (mult:MODEF (match_operand:MODEF 1 "register_operand" "")
+ (match_operand:MODEF 2 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+ "")
+
+;; SSE5 scalar multiply/add instructions are defined in sse.md.
+
+
+;; Divide instructions
+
+(define_insn "divqi3"
+ [(set (match_operand:QI 0 "register_operand" "=a")
+ (div:QI (match_operand:HI 1 "register_operand" "0")
+ (match_operand:QI 2 "nonimmediate_operand" "qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH"
+ "idiv{b}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "QI")])
+
+(define_insn "udivqi3"
+ [(set (match_operand:QI 0 "register_operand" "=a")
+ (udiv:QI (match_operand:HI 1 "register_operand" "0")
+ (match_operand:QI 2 "nonimmediate_operand" "qm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_QIMODE_MATH"
+ "div{b}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "QI")])
+
+;; The patterns that match these are at the end of this file.
+
+(define_expand "divxf3"
+ [(set (match_operand:XF 0 "register_operand" "")
+ (div:XF (match_operand:XF 1 "register_operand" "")
+ (match_operand:XF 2 "register_operand" "")))]
+ "TARGET_80387"
+ "")
+
+(define_expand "divdf3"
+ [(set (match_operand:DF 0 "register_operand" "")
+ (div:DF (match_operand:DF 1 "register_operand" "")
+ (match_operand:DF 2 "nonimmediate_operand" "")))]
+ "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+ "")
+
+(define_expand "divsf3"
+ [(set (match_operand:SF 0 "register_operand" "")
+ (div:SF (match_operand:SF 1 "register_operand" "")
+ (match_operand:SF 2 "nonimmediate_operand" "")))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+{
+ if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations)
+ {
+ ix86_emit_swdivsf (operands[0], operands[1],
+ operands[2], SFmode);
+ DONE;
+ }
+})
+
+;; Remainder instructions.
+
+(define_expand "divmoddi4"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (div:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "nonimmediate_operand" "")))
+ (set (match_operand:DI 3 "register_operand" "")
+ (mod:DI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "")
+
+;; Allow to come the parameter in eax or edx to avoid extra moves.
+;; Penalize eax case slightly because it results in worse scheduling
+;; of code.
+(define_insn "*divmoddi4_nocltd_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=&a,?a")
+ (div:DI (match_operand:DI 2 "register_operand" "1,0")
+ (match_operand:DI 3 "nonimmediate_operand" "rm,rm")))
+ (set (match_operand:DI 1 "register_operand" "=&d,&d")
+ (mod:DI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && optimize_function_for_speed_p (cfun) && !TARGET_USE_CLTD"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_insn "*divmoddi4_cltd_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (div:DI (match_operand:DI 2 "register_operand" "a")
+ (match_operand:DI 3 "nonimmediate_operand" "rm")))
+ (set (match_operand:DI 1 "register_operand" "=&d")
+ (mod:DI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_insn "*divmoddi_noext_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (div:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:DI 3 "register_operand" "=d")
+ (mod:DI (match_dup 1) (match_dup 2)))
+ (use (match_operand:DI 4 "register_operand" "3"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "idiv{q}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (div:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "nonimmediate_operand" "")))
+ (set (match_operand:DI 3 "register_operand" "")
+ (mod:DI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(parallel [(set (match_dup 3)
+ (ashiftrt:DI (match_dup 4) (const_int 63)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0)
+ (div:DI (reg:DI 0) (match_dup 2)))
+ (set (match_dup 3)
+ (mod:DI (reg:DI 0) (match_dup 2)))
+ (use (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ /* Avoid use of cltd in favor of a mov+shift. */
+ if (!TARGET_USE_CLTD && optimize_function_for_speed_p (cfun))
+ {
+ if (true_regnum (operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ else
+ emit_move_insn (operands[3], operands[1]);
+ operands[4] = operands[3];
+ }
+ else
+ {
+ gcc_assert (!true_regnum (operands[1]));
+ operands[4] = operands[1];
+ }
+})
+
+
+(define_expand "divmodsi4"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (div:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "nonimmediate_operand" "")))
+ (set (match_operand:SI 3 "register_operand" "")
+ (mod:SI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "")
+
+;; Allow to come the parameter in eax or edx to avoid extra moves.
+;; Penalize eax case slightly because it results in worse scheduling
+;; of code.
+(define_insn "*divmodsi4_nocltd"
+ [(set (match_operand:SI 0 "register_operand" "=&a,?a")
+ (div:SI (match_operand:SI 2 "register_operand" "1,0")
+ (match_operand:SI 3 "nonimmediate_operand" "rm,rm")))
+ (set (match_operand:SI 1 "register_operand" "=&d,&d")
+ (mod:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "optimize_function_for_speed_p (cfun) && !TARGET_USE_CLTD"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_insn "*divmodsi4_cltd"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (div:SI (match_operand:SI 2 "register_operand" "a")
+ (match_operand:SI 3 "nonimmediate_operand" "rm")))
+ (set (match_operand:SI 1 "register_operand" "=&d")
+ (mod:SI (match_dup 2) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))]
+ "optimize_function_for_size_p (cfun) || TARGET_USE_CLTD"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_insn "*divmodsi_noext"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (div:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:SI 3 "register_operand" "=d")
+ (mod:SI (match_dup 1) (match_dup 2)))
+ (use (match_operand:SI 4 "register_operand" "3"))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "idiv{l}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "SI")])
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (div:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "nonimmediate_operand" "")))
+ (set (match_operand:SI 3 "register_operand" "")
+ (mod:SI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (match_dup 3)
+ (ashiftrt:SI (match_dup 4) (const_int 31)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0)
+ (div:SI (reg:SI 0) (match_dup 2)))
+ (set (match_dup 3)
+ (mod:SI (reg:SI 0) (match_dup 2)))
+ (use (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ /* Avoid use of cltd in favor of a mov+shift. */
+ if (!TARGET_USE_CLTD && optimize_function_for_speed_p (cfun))
+ {
+ if (true_regnum (operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+ else
+ emit_move_insn (operands[3], operands[1]);
+ operands[4] = operands[3];
+ }
+ else
+ {
+ gcc_assert (!true_regnum (operands[1]));
+ operands[4] = operands[1];
+ }
+})
+;; %%% Split me.
+(define_insn "divmodhi4"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (div:HI (match_operand:HI 1 "register_operand" "0")
+ (match_operand:HI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:HI 3 "register_operand" "=&d")
+ (mod:HI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_HIMODE_MATH"
+ "cwtd\;idiv{w}\t%2"
+ [(set_attr "type" "multi")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "SI")])
+
+(define_insn "udivmoddi4"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (udiv:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:DI 3 "register_operand" "=&d")
+ (umod:DI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "xor{q}\t%3, %3\;div{q}\t%2"
+ [(set_attr "type" "multi")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "DI")])
+
+(define_insn "*udivmoddi4_noext"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (udiv:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:DI 3 "register_operand" "=d")
+ (umod:DI (match_dup 1) (match_dup 2)))
+ (use (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "div{q}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (udiv:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "nonimmediate_operand" "")))
+ (set (match_operand:DI 3 "register_operand" "")
+ (umod:DI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(set (match_dup 3) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (udiv:DI (match_dup 1) (match_dup 2)))
+ (set (match_dup 3)
+ (umod:DI (match_dup 1) (match_dup 2)))
+ (use (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_insn "udivmodsi4"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (udiv:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:SI 3 "register_operand" "=&d")
+ (umod:SI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "xor{l}\t%3, %3\;div{l}\t%2"
+ [(set_attr "type" "multi")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "SI")])
+
+(define_insn "*udivmodsi4_noext"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (udiv:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:SI 3 "register_operand" "=d")
+ (umod:SI (match_dup 1) (match_dup 2)))
+ (use (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "div{l}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "SI")])
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (udiv:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "nonimmediate_operand" "")))
+ (set (match_operand:SI 3 "register_operand" "")
+ (umod:SI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(set (match_dup 3) (const_int 0))
+ (parallel [(set (match_dup 0)
+ (udiv:SI (match_dup 1) (match_dup 2)))
+ (set (match_dup 3)
+ (umod:SI (match_dup 1) (match_dup 2)))
+ (use (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_expand "udivmodhi4"
+ [(set (match_dup 4) (const_int 0))
+ (parallel [(set (match_operand:HI 0 "register_operand" "")
+ (udiv:HI (match_operand:HI 1 "register_operand" "")
+ (match_operand:HI 2 "nonimmediate_operand" "")))
+ (set (match_operand:HI 3 "register_operand" "")
+ (umod:HI (match_dup 1) (match_dup 2)))
+ (use (match_dup 4))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_HIMODE_MATH"
+ "operands[4] = gen_reg_rtx (HImode);")
+
+(define_insn "*udivmodhi_noext"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (udiv:HI (match_operand:HI 1 "register_operand" "0")
+ (match_operand:HI 2 "nonimmediate_operand" "rm")))
+ (set (match_operand:HI 3 "register_operand" "=d")
+ (umod:HI (match_dup 1) (match_dup 2)))
+ (use (match_operand:HI 4 "register_operand" "3"))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "div{w}\t%2"
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "HI")])
+
+;; We cannot use div/idiv for double division, because it causes
+;; "division by zero" on the overflow and that's not what we expect
+;; from truncate. Because true (non truncating) double division is
+;; never generated, we can't create this insn anyway.
+;
+;(define_insn ""
+; [(set (match_operand:SI 0 "register_operand" "=a")
+; (truncate:SI
+; (udiv:DI (match_operand:DI 1 "register_operand" "A")
+; (zero_extend:DI
+; (match_operand:SI 2 "nonimmediate_operand" "rm")))))
+; (set (match_operand:SI 3 "register_operand" "=d")
+; (truncate:SI
+; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2)))))
+; (clobber (reg:CC FLAGS_REG))]
+; ""
+; "div{l}\t{%2, %0|%0, %2}"
+; [(set_attr "type" "idiv")])
+
+;;- Logical AND instructions
+
+;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al.
+;; Note that this excludes ah.
+
+(define_insn "*testdi_1_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:DI (match_operand:DI 0 "nonimmediate_operand" "%!*a,r,!*a,r,rm")
+ (match_operand:DI 1 "x86_64_szext_general_operand" "Z,Z,e,e,re"))
+ (const_int 0)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ test{l}\t{%k1, %k0|%k0, %k1}
+ test{l}\t{%k1, %k0|%k0, %k1}
+ test{q}\t{%1, %0|%0, %1}
+ test{q}\t{%1, %0|%0, %1}
+ test{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "modrm" "0,1,0,1,1")
+ (set_attr "mode" "SI,SI,DI,DI,DI")
+ (set_attr "pent_pair" "uv,np,uv,np,uv")])
+
+(define_insn "testsi_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:SI (match_operand:SI 0 "nonimmediate_operand" "%!*a,r,rm")
+ (match_operand:SI 1 "general_operand" "i,i,ri"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "test{l}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "modrm" "0,1,1")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "uv,np,uv")])
+
+(define_expand "testsi_ccno_1"
+ [(set (reg:CCNO FLAGS_REG)
+ (compare:CCNO
+ (and:SI (match_operand:SI 0 "nonimmediate_operand" "")
+ (match_operand:SI 1 "nonmemory_operand" ""))
+ (const_int 0)))]
+ ""
+ "")
+
+(define_insn "*testhi_1"
+ [(set (reg FLAGS_REG)
+ (compare (and:HI (match_operand:HI 0 "nonimmediate_operand" "%!*a,r,rm")
+ (match_operand:HI 1 "general_operand" "n,n,rn"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "test{w}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "modrm" "0,1,1")
+ (set_attr "mode" "HI")
+ (set_attr "pent_pair" "uv,np,uv")])
+
+(define_expand "testqi_ccz_1"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand" "")
+ (match_operand:QI 1 "nonmemory_operand" ""))
+ (const_int 0)))]
+ ""
+ "")
+
+(define_insn "*testqi_1_maybe_si"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:QI
+ (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r")
+ (match_operand:QI 1 "general_operand" "n,n,qn,n"))
+ (const_int 0)))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && ix86_match_ccmode (insn,
+ CONST_INT_P (operands[1])
+ && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)"
+{
+ if (which_alternative == 3)
+ {
+ if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0)
+ operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff);
+ return "test{l}\t{%1, %k0|%k0, %1}";
+ }
+ return "test{b}\t{%1, %0|%0, %1}";
+}
+ [(set_attr "type" "test")
+ (set_attr "modrm" "0,1,1,1")
+ (set_attr "mode" "QI,QI,QI,SI")
+ (set_attr "pent_pair" "uv,np,uv,np")])
+
+(define_insn "*testqi_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:QI
+ (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm")
+ (match_operand:QI 1 "general_operand" "n,n,qn"))
+ (const_int 0)))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+ && ix86_match_ccmode (insn, CCNOmode)"
+ "test{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "modrm" "0,1,1")
+ (set_attr "mode" "QI")
+ (set_attr "pent_pair" "uv,np,uv")])
+
+(define_expand "testqi_ext_ccno_0"
+ [(set (reg:CCNO FLAGS_REG)
+ (compare:CCNO
+ (and:SI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 1 "const_int_operand" ""))
+ (const_int 0)))]
+ ""
+ "")
+
+(define_insn "*testqi_ext_0"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:SI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 1 "const_int_operand" "n"))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "test{b}\t{%1, %h0|%h0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "mode" "QI")
+ (set_attr "length_immediate" "1")
+ (set_attr "pent_pair" "np")])
+
+(define_insn "*testqi_ext_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:SI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand:QI 1 "general_operand" "Qm")))
+ (const_int 0)))]
+ "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "test{b}\t{%1, %h0|%h0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "mode" "QI")])
+
+(define_insn "*testqi_ext_1_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:SI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand:QI 1 "register_operand" "Q")))
+ (const_int 0)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+ "test{b}\t{%1, %h0|%h0, %1}"
+ [(set_attr "type" "test")
+ (set_attr "mode" "QI")])
+
+(define_insn "*testqi_ext_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:SI
+ (zero_extract:SI
+ (match_operand 0 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8)))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "test{b}\t{%h1, %h0|%h0, %h1}"
+ [(set_attr "type" "test")
+ (set_attr "mode" "QI")])
+
+;; Combine likes to form bit extractions for some tests. Humor it.
+(define_insn "*testqi_ext_3"
+ [(set (reg FLAGS_REG)
+ (compare (zero_extract:SI
+ (match_operand 0 "nonimmediate_operand" "rm")
+ (match_operand:SI 1 "const_int_operand" "")
+ (match_operand:SI 2 "const_int_operand" ""))
+ (const_int 0)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && INTVAL (operands[1]) > 0
+ && INTVAL (operands[2]) >= 0
+ && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32
+ && (GET_MODE (operands[0]) == SImode
+ || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)
+ || GET_MODE (operands[0]) == HImode
+ || GET_MODE (operands[0]) == QImode)"
+ "#")
+
+(define_insn "*testqi_ext_3_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (zero_extract:DI
+ (match_operand 0 "nonimmediate_operand" "rm")
+ (match_operand:DI 1 "const_int_operand" "")
+ (match_operand:DI 2 "const_int_operand" ""))
+ (const_int 0)))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCNOmode)
+ && INTVAL (operands[1]) > 0
+ && INTVAL (operands[2]) >= 0
+ /* Ensure that resulting mask is zero or sign extended operand. */
+ && (INTVAL (operands[1]) + INTVAL (operands[2]) <= 32
+ || (INTVAL (operands[1]) + INTVAL (operands[2]) == 64
+ && INTVAL (operands[1]) > 32))
+ && (GET_MODE (operands[0]) == SImode
+ || GET_MODE (operands[0]) == DImode
+ || GET_MODE (operands[0]) == HImode
+ || GET_MODE (operands[0]) == QImode)"
+ "#")
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(zero_extract
+ (match_operand 2 "nonimmediate_operand" "")
+ (match_operand 3 "const_int_operand" "")
+ (match_operand 4 "const_int_operand" ""))
+ (const_int 0)]))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))]
+{
+ rtx val = operands[2];
+ HOST_WIDE_INT len = INTVAL (operands[3]);
+ HOST_WIDE_INT pos = INTVAL (operands[4]);
+ HOST_WIDE_INT mask;
+ enum machine_mode mode, submode;
+
+ mode = GET_MODE (val);
+ if (MEM_P (val))
+ {
+ /* ??? Combine likes to put non-volatile mem extractions in QImode
+ no matter the size of the test. So find a mode that works. */
+ if (! MEM_VOLATILE_P (val))
+ {
+ mode = smallest_mode_for_size (pos + len, MODE_INT);
+ val = adjust_address (val, mode, 0);
+ }
+ }
+ else if (GET_CODE (val) == SUBREG
+ && (submode = GET_MODE (SUBREG_REG (val)),
+ GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode))
+ && pos + len <= GET_MODE_BITSIZE (submode))
+ {
+ /* Narrow a paradoxical subreg to prevent partial register stalls. */
+ mode = submode;
+ val = SUBREG_REG (val);
+ }
+ else if (mode == HImode && pos + len <= 8)
+ {
+ /* Small HImode tests can be converted to QImode. */
+ mode = QImode;
+ val = gen_lowpart (QImode, val);
+ }
+
+ if (len == HOST_BITS_PER_WIDE_INT)
+ mask = -1;
+ else
+ mask = ((HOST_WIDE_INT)1 << len) - 1;
+ mask <<= pos;
+
+ operands[2] = gen_rtx_AND (mode, val, gen_int_mode (mask, mode));
+})
+
+;; Convert HImode/SImode test instructions with immediate to QImode ones.
+;; i386 does not allow to encode test with 8bit sign extended immediate, so
+;; this is relatively important trick.
+;; Do the conversion only post-reload to avoid limiting of the register class
+;; to QI regs.
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(and (match_operand 2 "register_operand" "")
+ (match_operand 3 "const_int_operand" ""))
+ (const_int 0)]))]
+ "reload_completed
+ && QI_REG_P (operands[2])
+ && GET_MODE (operands[2]) != QImode
+ && ((ix86_match_ccmode (insn, CCZmode)
+ && !(INTVAL (operands[3]) & ~(255 << 8)))
+ || (ix86_match_ccmode (insn, CCNOmode)
+ && !(INTVAL (operands[3]) & ~(127 << 8))))"
+ [(set (match_dup 0)
+ (match_op_dup 1
+ [(and:SI (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8))
+ (match_dup 3))
+ (const_int 0)]))]
+ "operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, SImode);")
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(and (match_operand 2 "nonimmediate_operand" "")
+ (match_operand 3 "const_int_operand" ""))
+ (const_int 0)]))]
+ "reload_completed
+ && GET_MODE (operands[2]) != QImode
+ && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2]))
+ && ((ix86_match_ccmode (insn, CCZmode)
+ && !(INTVAL (operands[3]) & ~255))
+ || (ix86_match_ccmode (insn, CCNOmode)
+ && !(INTVAL (operands[3]) & ~127)))"
+ [(set (match_dup 0)
+ (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
+ (const_int 0)]))]
+ "operands[2] = gen_lowpart (QImode, operands[2]);
+ operands[3] = gen_lowpart (QImode, operands[3]);")
+
+
+;; %%% This used to optimize known byte-wide and operations to memory,
+;; and sometimes to QImode registers. If this is considered useful,
+;; it should be done with splitters.
+
+(define_expand "anddi3"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (and:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "")))]
+ "TARGET_64BIT"
+ "ix86_expand_binary_operator (AND, DImode, operands); DONE;")
+
+(define_insn "*anddi_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
+ (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ {
+ enum machine_mode mode;
+
+ gcc_assert (CONST_INT_P (operands[2]));
+ if (INTVAL (operands[2]) == 0xff)
+ mode = QImode;
+ else
+ {
+ gcc_assert (INTVAL (operands[2]) == 0xffff);
+ mode = HImode;
+ }
+
+ operands[1] = gen_lowpart (mode, operands[1]);
+ if (mode == QImode)
+ return "movz{bq|x}\t{%1,%0|%0, %1}";
+ else
+ return "movz{wq|x}\t{%1,%0|%0, %1}";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ if (get_attr_mode (insn) == MODE_SI)
+ return "and{l}\t{%k2, %k0|%k0, %k2}";
+ else
+ return "and{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set_attr "type" "alu,alu,alu,imovx")
+ (set_attr "length_immediate" "*,*,*,0")
+ (set_attr "mode" "SI,DI,DI,DI")])
+
+(define_insn "*anddi_2"
+ [(set (reg FLAGS_REG)
+ (compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0")
+ (match_operand:DI 2 "x86_64_szext_general_operand" "Z,rem,re"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm")
+ (and:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (AND, DImode, operands)"
+ "@
+ and{l}\t{%k2, %k0|%k0, %k2}
+ and{q}\t{%2, %0|%0, %2}
+ and{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI,DI,DI")])
+
+(define_expand "andsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (and:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (AND, SImode, operands); DONE;")
+
+(define_insn "*andsi_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r")
+ (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm")
+ (match_operand:SI 2 "general_operand" "ri,rm,L")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (AND, SImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ {
+ enum machine_mode mode;
+
+ gcc_assert (CONST_INT_P (operands[2]));
+ if (INTVAL (operands[2]) == 0xff)
+ mode = QImode;
+ else
+ {
+ gcc_assert (INTVAL (operands[2]) == 0xffff);
+ mode = HImode;
+ }
+
+ operands[1] = gen_lowpart (mode, operands[1]);
+ if (mode == QImode)
+ return "movz{bl|x}\t{%1,%0|%0, %1}";
+ else
+ return "movz{wl|x}\t{%1,%0|%0, %1}";
+ }
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ return "and{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set_attr "type" "alu,alu,imovx")
+ (set_attr "length_immediate" "*,*,0")
+ (set_attr "mode" "SI")])
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (and (match_dup 0)
+ (const_int -65536)))
+ (clobber (reg:CC FLAGS_REG))]
+ "optimize_function_for_size_p (cfun) || (TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)"
+ [(set (strict_low_part (match_dup 1)) (const_int 0))]
+ "operands[1] = gen_lowpart (HImode, operands[0]);")
+
+(define_split
+ [(set (match_operand 0 "ext_register_operand" "")
+ (and (match_dup 0)
+ (const_int -256)))
+ (clobber (reg:CC FLAGS_REG))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_REG_STALL) && reload_completed"
+ [(set (strict_low_part (match_dup 1)) (const_int 0))]
+ "operands[1] = gen_lowpart (QImode, operands[0]);")
+
+(define_split
+ [(set (match_operand 0 "ext_register_operand" "")
+ (and (match_dup 0)
+ (const_int -65281)))
+ (clobber (reg:CC FLAGS_REG))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_REG_STALL) && reload_completed"
+ [(parallel [(set (zero_extract:SI (match_dup 0)
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI (match_dup 0)
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SI (match_dup 0)
+ (const_int 8)
+ (const_int 8))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);")
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*andsi_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "g"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)"
+ "and{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*andsi_2"
+ [(set (reg FLAGS_REG)
+ (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "g,ri"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
+ (and:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (AND, SImode, operands)"
+ "and{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*andsi_2_zext"
+ [(set (reg FLAGS_REG)
+ (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "g"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (AND, SImode, operands)"
+ "and{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_expand "andhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (and:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:HI 2 "general_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (AND, HImode, operands); DONE;")
+
+(define_insn "*andhi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
+ (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm")
+ (match_operand:HI 2 "general_operand" "rn,rm,L")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (AND, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOVX:
+ gcc_assert (CONST_INT_P (operands[2]));
+ gcc_assert (INTVAL (operands[2]) == 0xff);
+ return "movz{bl|x}\t{%b1, %k0|%k0, %b1}";
+
+ default:
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+
+ return "and{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set_attr "type" "alu,alu,imovx")
+ (set_attr "length_immediate" "*,*,0")
+ (set_attr "mode" "HI,HI,SI")])
+
+(define_insn "*andhi_2"
+ [(set (reg FLAGS_REG)
+ (compare (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "rmn,rn"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
+ (and:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (AND, HImode, operands)"
+ "and{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_expand "andqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (and:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "general_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (AND, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2. What to do?
+(define_insn "*andqi_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
+ (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+ (match_operand:QI 2 "general_operand" "qn,qmn,rn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (AND, QImode, operands)"
+ "@
+ and{b}\t{%2, %0|%0, %2}
+ and{b}\t{%2, %0|%0, %2}
+ and{l}\t{%k2, %k0|%k0, %k2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*andqi_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+ (and:QI (match_dup 0)
+ (match_operand:QI 1 "general_operand" "qn,qmn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "and{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*andqi_2_maybe_si"
+ [(set (reg FLAGS_REG)
+ (compare (and:QI
+ (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+ (match_operand:QI 2 "general_operand" "qmn,qn,n"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r")
+ (and:QI (match_dup 1) (match_dup 2)))]
+ "ix86_binary_operator_ok (AND, QImode, operands)
+ && ix86_match_ccmode (insn,
+ CONST_INT_P (operands[2])
+ && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)"
+{
+ if (which_alternative == 2)
+ {
+ if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+ operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff);
+ return "and{l}\t{%2, %k0|%k0, %2}";
+ }
+ return "and{b}\t{%2, %0|%0, %2}";
+}
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*andqi_2"
+ [(set (reg FLAGS_REG)
+ (compare (and:QI
+ (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:QI 2 "general_operand" "qmn,qn"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
+ (and:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (AND, QImode, operands)"
+ "and{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*andqi_2_slp"
+ [(set (reg FLAGS_REG)
+ (compare (and:QI
+ (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+ (match_operand:QI 1 "nonimmediate_operand" "qmn,qn"))
+ (const_int 0)))
+ (set (strict_low_part (match_dup 0))
+ (and:QI (match_dup 0) (match_dup 1)))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCNOmode)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "and{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+;; ??? A bug in recog prevents it from recognizing a const_int as an
+;; operand to zero_extend in andqi_ext_1. It was checking explicitly
+;; for a QImode operand, which of course failed.
+
+(define_insn "andqi_ext_0"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (and:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 2 "const_int_operand" "n")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "and{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "QI")])
+
+;; Generated by peephole translating test to and. This shows up
+;; often in fp comparisons.
+
+(define_insn "*andqi_ext_0_cc"
+ [(set (reg FLAGS_REG)
+ (compare
+ (and:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 2 "const_int_operand" "n"))
+ (const_int 0)))
+ (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (and:SI
+ (zero_extract:SI
+ (match_dup 1)
+ (const_int 8)
+ (const_int 8))
+ (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ "and{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_1"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (and:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand:QI 2 "general_operand" "Qm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ "and{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_1_rex64"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (and:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand 2 "ext_register_operand" "Q"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "and{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*andqi_ext_2"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (and:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "%0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SI
+ (match_operand 2 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "and{b}\t{%h2, %h0|%h0, %h2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+;; Convert wide AND instructions with immediate operand to shorter QImode
+;; equivalents when possible.
+;; Don't do the splitting with memory operands, since it introduces risk
+;; of memory mismatch stalls. We may want to do the splitting for optimizing
+;; for size, but that can (should?) be handled by generic code instead.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (and (match_operand 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(~INTVAL (operands[2]) & ~(255 << 8))
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+ (and:SI (zero_extract:SI (match_dup 1)
+ (const_int 8) (const_int 8))
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since AND can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is not set.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (and (match_operand 1 "general_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && ANY_QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(~INTVAL (operands[2]) & ~255)
+ && !(INTVAL (operands[2]) & 128)
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (strict_low_part (match_dup 0))
+ (and:QI (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (QImode, operands[0]);
+ operands[1] = gen_lowpart (QImode, operands[1]);
+ operands[2] = gen_lowpart (QImode, operands[2]);")
+
+;; Logical inclusive OR instructions
+
+;; %%% This used to optimize known byte-wide and operations to memory.
+;; If this is considered useful, it should be done with splitters.
+
+(define_expand "iordi3"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (ior:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "x86_64_general_operand" "")))]
+ "TARGET_64BIT"
+ "ix86_expand_binary_operator (IOR, DImode, operands); DONE;")
+
+(define_insn "*iordi_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rme")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && ix86_binary_operator_ok (IOR, DImode, operands)"
+ "or{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*iordi_2_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "rem,re"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm")
+ (ior:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, DImode, operands)"
+ "or{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*iordi_3_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+ (match_operand:DI 2 "x86_64_general_operand" "rem"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, DImode, operands)"
+ "or{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+
+(define_expand "iorsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (ior:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (IOR, SImode, operands); DONE;")
+
+(define_insn "*iorsi_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "ri,g")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (IOR, SImode, operands)"
+ "or{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+(define_insn "*iorsi_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "g"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (IOR, SImode, operands)"
+ "or{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*iorsi_1_zext_imm"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+ (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "or{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*iorsi_2"
+ [(set (reg FLAGS_REG)
+ (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "g,ri"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
+ (ior:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, SImode, operands)"
+ "or{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+;; ??? Special case for immediate operand is missing - it is tricky.
+(define_insn "*iorsi_2_zext"
+ [(set (reg FLAGS_REG)
+ (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "g"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (ior:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, SImode, operands)"
+ "or{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*iorsi_2_zext_imm"
+ [(set (reg FLAGS_REG)
+ (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand 2 "x86_64_zext_immediate_operand" "Z"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, SImode, operands)"
+ "or{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*iorsi_3"
+ [(set (reg FLAGS_REG)
+ (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "g"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "or{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_expand "iorhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (ior:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:HI 2 "general_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (IOR, HImode, operands); DONE;")
+
+(define_insn "*iorhi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m")
+ (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "rmn,rn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (IOR, HImode, operands)"
+ "or{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_insn "*iorhi_2"
+ [(set (reg FLAGS_REG)
+ (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "rmn,rn"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
+ (ior:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, HImode, operands)"
+ "or{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_insn "*iorhi_3"
+ [(set (reg FLAGS_REG)
+ (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
+ (match_operand:HI 2 "general_operand" "rmn"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "or{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_expand "iorqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (ior:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "general_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (IOR, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2. What to do?
+(define_insn "*iorqi_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r")
+ (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+ (match_operand:QI 2 "general_operand" "qmn,qn,rn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (IOR, QImode, operands)"
+ "@
+ or{b}\t{%2, %0|%0, %2}
+ or{b}\t{%2, %0|%0, %2}
+ or{l}\t{%k2, %k0|%k0, %k2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*iorqi_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+q,m"))
+ (ior:QI (match_dup 0)
+ (match_operand:QI 1 "general_operand" "qmn,qn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "or{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_2"
+ [(set (reg FLAGS_REG)
+ (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:QI 2 "general_operand" "qmn,qn"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
+ (ior:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (IOR, QImode, operands)"
+ "or{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_2_slp"
+ [(set (reg FLAGS_REG)
+ (compare (ior:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+ (match_operand:QI 1 "general_operand" "qmn,qn"))
+ (const_int 0)))
+ (set (strict_low_part (match_dup 0))
+ (ior:QI (match_dup 0) (match_dup 1)))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCNOmode)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "or{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_3"
+ [(set (reg FLAGS_REG)
+ (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+ (match_operand:QI 2 "general_operand" "qmn"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "or{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_0"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (ior:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 2 "const_int_operand" "n")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+ "or{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_1"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (ior:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand:QI 2 "general_operand" "Qm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT
+ && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+ "or{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_1_rex64"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (ior:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand 2 "ext_register_operand" "Q"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+ "or{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*iorqi_ext_2"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (ior:SI
+ (zero_extract:SI (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SI (match_operand 2 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))))
+ (clobber (reg:CC FLAGS_REG))]
+ "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+ "ior{b}\t{%h2, %h0|%h0, %h2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (ior (match_operand 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(INTVAL (operands[2]) & ~(255 << 8))
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+ (ior:SI (zero_extract:SI (match_dup 1)
+ (const_int 8) (const_int 8))
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since OR can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is set.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (ior (match_operand 1 "general_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && ANY_QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(INTVAL (operands[2]) & ~255)
+ && (INTVAL (operands[2]) & 128)
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (strict_low_part (match_dup 0))
+ (ior:QI (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (QImode, operands[0]);
+ operands[1] = gen_lowpart (QImode, operands[1]);
+ operands[2] = gen_lowpart (QImode, operands[2]);")
+
+;; Logical XOR instructions
+
+;; %%% This used to optimize known byte-wide and operations to memory.
+;; If this is considered useful, it should be done with splitters.
+
+(define_expand "xordi3"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (xor:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "x86_64_general_operand" "")))]
+ "TARGET_64BIT"
+ "ix86_expand_binary_operator (XOR, DImode, operands); DONE;")
+
+(define_insn "*xordi_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "re,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && ix86_binary_operator_ok (XOR, DImode, operands)"
+ "xor{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*xordi_2_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:DI 2 "x86_64_general_operand" "rem,re"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm")
+ (xor:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, DImode, operands)"
+ "xor{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_insn "*xordi_3_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+ (match_operand:DI 2 "x86_64_general_operand" "rem"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT
+ && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, DImode, operands)"
+ "xor{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "DI")])
+
+(define_expand "xorsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (xor:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (XOR, SImode, operands); DONE;")
+
+(define_insn "*xorsi_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "ri,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (XOR, SImode, operands)"
+ "xor{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+;; Add speccase for immediates
+(define_insn "*xorsi_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "g"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)"
+ "xor{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*xorsi_1_zext_imm"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (xor:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
+ (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)"
+ "xor{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*xorsi_2"
+ [(set (reg FLAGS_REG)
+ (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SI 2 "general_operand" "g,ri"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
+ (xor:SI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, SImode, operands)"
+ "xor{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+;; See comment for addsi_1_zext why we do use nonimmediate_operand
+;; ??? Special case for immediate operand is missing - it is tricky.
+(define_insn "*xorsi_2_zext"
+ [(set (reg FLAGS_REG)
+ (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "g"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (xor:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, SImode, operands)"
+ "xor{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*xorsi_2_zext_imm"
+ [(set (reg FLAGS_REG)
+ (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand 2 "x86_64_zext_immediate_operand" "Z"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (xor:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, SImode, operands)"
+ "xor{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_insn "*xorsi_3"
+ [(set (reg FLAGS_REG)
+ (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+ (match_operand:SI 2 "general_operand" "g"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "xor{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "SI")])
+
+(define_expand "xorhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (xor:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:HI 2 "general_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (XOR, HImode, operands); DONE;")
+
+(define_insn "*xorhi_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m")
+ (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "rmn,rn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (XOR, HImode, operands)"
+ "xor{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_insn "*xorhi_2"
+ [(set (reg FLAGS_REG)
+ (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:HI 2 "general_operand" "rmn,rn"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
+ (xor:HI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, HImode, operands)"
+ "xor{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_insn "*xorhi_3"
+ [(set (reg FLAGS_REG)
+ (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
+ (match_operand:HI 2 "general_operand" "rmn"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "xor{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "HI")])
+
+(define_expand "xorqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (xor:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "general_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (XOR, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2. What to do?
+(define_insn "*xorqi_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r")
+ (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
+ (match_operand:QI 2 "general_operand" "qmn,qn,rn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (XOR, QImode, operands)"
+ "@
+ xor{b}\t{%2, %0|%0, %2}
+ xor{b}\t{%2, %0|%0, %2}
+ xor{l}\t{%k2, %k0|%k0, %k2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI,QI,SI")])
+
+(define_insn "*xorqi_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
+ (xor:QI (match_dup 0)
+ (match_operand:QI 1 "general_operand" "qn,qmn")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "xor{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_ext_0"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 2 "const_int_operand" "n")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+ "xor{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_ext_1"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand:QI 2 "general_operand" "Qm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT
+ && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+ "xor{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_ext_1_rex64"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extend:SI
+ (match_operand 2 "ext_register_operand" "Q"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+ "xor{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_ext_2"
+ [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (zero_extract:SI (match_operand 2 "ext_register_operand" "Q")
+ (const_int 8)
+ (const_int 8))))
+ (clobber (reg:CC FLAGS_REG))]
+ "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
+ "xor{b}\t{%h2, %h0|%h0, %h2}"
+ [(set_attr "type" "alu")
+ (set_attr "length_immediate" "0")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_cc_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
+ (match_operand:QI 2 "general_operand" "qmn,qn"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
+ (xor:QI (match_dup 1) (match_dup 2)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_binary_operator_ok (XOR, QImode, operands)"
+ "xor{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_2_slp"
+ [(set (reg FLAGS_REG)
+ (compare (xor:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
+ (match_operand:QI 1 "general_operand" "qmn,qn"))
+ (const_int 0)))
+ (set (strict_low_part (match_dup 0))
+ (xor:QI (match_dup 0) (match_dup 1)))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCNOmode)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "xor{b}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_cc_2"
+ [(set (reg FLAGS_REG)
+ (compare
+ (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
+ (match_operand:QI 2 "general_operand" "qmn"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "xor{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_cc_ext_1"
+ [(set (reg FLAGS_REG)
+ (compare
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:QI 2 "general_operand" "qmn"))
+ (const_int 0)))
+ (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8))
+ (match_dup 2)))]
+ "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+ "xor{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_insn "*xorqi_cc_ext_1_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "0")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:QI 2 "nonmemory_operand" "Qn"))
+ (const_int 0)))
+ (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8))
+ (match_dup 2)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+ "xor{b}\t{%2, %h0|%h0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
+(define_expand "xorqi_cc_ext_1"
+ [(parallel [
+ (set (reg:CCNO FLAGS_REG)
+ (compare:CCNO
+ (xor:SI
+ (zero_extract:SI
+ (match_operand 1 "ext_register_operand" "")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:QI 2 "general_operand" ""))
+ (const_int 0)))
+ (set (zero_extract:SI (match_operand 0 "ext_register_operand" "")
+ (const_int 8)
+ (const_int 8))
+ (xor:SI
+ (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8))
+ (match_dup 2)))])]
+ ""
+ "")
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (xor (match_operand 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(INTVAL (operands[2]) & ~(255 << 8))
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
+ (xor:SI (zero_extract:SI (match_dup 1)
+ (const_int 8) (const_int 8))
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);")
+
+;; Since XOR can be encoded with sign extended immediate, this is only
+;; profitable when 7th bit is set.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (xor (match_operand 1 "general_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && ANY_QI_REG_P (operands[0])
+ && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(INTVAL (operands[2]) & ~255)
+ && (INTVAL (operands[2]) & 128)
+ && GET_MODE (operands[0]) != QImode"
+ [(parallel [(set (strict_low_part (match_dup 0))
+ (xor:QI (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (QImode, operands[0]);
+ operands[1] = gen_lowpart (QImode, operands[1]);
+ operands[2] = gen_lowpart (QImode, operands[2]);")
+
+;; Negation instructions
+
+(define_expand "negti2"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (neg:TI (match_operand:TI 1 "nonimmediate_operand" "")))]
+ "TARGET_64BIT"
+ "ix86_expand_unary_operator (NEG, TImode, operands); DONE;")
+
+(define_insn "*negti2_1"
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=ro")
+ (neg:TI (match_operand:TI 1 "nonimmediate_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && ix86_unary_operator_ok (NEG, TImode, operands)"
+ "#")
+
+(define_split
+ [(set (match_operand:TI 0 "nonimmediate_operand" "")
+ (neg:TI (match_operand:TI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed"
+ [(parallel
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (neg:DI (match_dup 1)) (const_int 0)))
+ (set (match_dup 0) (neg:DI (match_dup 1)))])
+ (parallel
+ [(set (match_dup 2)
+ (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 3))
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 2)
+ (neg:DI (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_ti (&operands[0], 2, &operands[0], &operands[2]);")
+
+(define_expand "negdi2"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (neg:DI (match_operand:DI 1 "nonimmediate_operand" "")))]
+ ""
+ "ix86_expand_unary_operator (NEG, DImode, operands); DONE;")
+
+(define_insn "*negdi2_1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=ro")
+ (neg:DI (match_operand:DI 1 "general_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT
+ && ix86_unary_operator_ok (NEG, DImode, operands)"
+ "#")
+
+(define_split
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (neg:DI (match_operand:DI 1 "general_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && reload_completed"
+ [(parallel
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (neg:SI (match_dup 1)) (const_int 0)))
+ (set (match_dup 0) (neg:SI (match_dup 1)))])
+ (parallel
+ [(set (match_dup 2)
+ (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
+ (match_dup 3))
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 2)
+ (neg:SI (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_di (&operands[0], 2, &operands[0], &operands[2]);");
+
+(define_insn "*negdi2_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (neg:DI (match_operand:DI 1 "nonimmediate_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)"
+ "neg{q}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "DI")])
+
+;; The problem with neg is that it does not perform (compare x 0),
+;; it really performs (compare 0 x), which leaves us with the zero
+;; flag being the only useful item.
+
+(define_insn "*negdi2_cmpz_rex64"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (neg:DI (match_operand:DI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (neg:DI (match_dup 1)))]
+ "TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)"
+ "neg{q}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "DI")])
+
+
+(define_expand "negsi2"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (neg:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
+ ""
+ "ix86_expand_unary_operator (NEG, SImode, operands); DONE;")
+
+(define_insn "*negsi2_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_unary_operator_ok (NEG, SImode, operands)"
+ "neg{l}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "SI")])
+
+;; Combine is quite creative about this pattern.
+(define_insn "*negsi2_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (lshiftrt:DI (neg:DI (ashift:DI (match_operand:DI 1 "register_operand" "0")
+ (const_int 32)))
+ (const_int 32)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
+ "neg{l}\t%k0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "SI")])
+
+;; The problem with neg is that it does not perform (compare x 0),
+;; it really performs (compare 0 x), which leaves us with the zero
+;; flag being the only useful item.
+
+(define_insn "*negsi2_cmpz"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (neg:SI (match_dup 1)))]
+ "ix86_unary_operator_ok (NEG, SImode, operands)"
+ "neg{l}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "SI")])
+
+(define_insn "*negsi2_cmpz_zext"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (lshiftrt:DI
+ (neg:DI (ashift:DI
+ (match_operand:DI 1 "register_operand" "0")
+ (const_int 32)))
+ (const_int 32))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (lshiftrt:DI (neg:DI (ashift:DI (match_dup 1)
+ (const_int 32)))
+ (const_int 32)))]
+ "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)"
+ "neg{l}\t%k0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "SI")])
+
+(define_expand "neghi2"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (neg:HI (match_operand:HI 1 "nonimmediate_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_unary_operator (NEG, HImode, operands); DONE;")
+
+(define_insn "*neghi2_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_unary_operator_ok (NEG, HImode, operands)"
+ "neg{w}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "HI")])
+
+(define_insn "*neghi2_cmpz"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (neg:HI (match_dup 1)))]
+ "ix86_unary_operator_ok (NEG, HImode, operands)"
+ "neg{w}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "HI")])
+
+(define_expand "negqi2"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_unary_operator (NEG, QImode, operands); DONE;")
+
+(define_insn "*negqi2_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_unary_operator_ok (NEG, QImode, operands)"
+ "neg{b}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "QI")])
+
+(define_insn "*negqi2_cmpz"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (neg:QI (match_dup 1)))]
+ "ix86_unary_operator_ok (NEG, QImode, operands)"
+ "neg{b}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "QI")])
+
+;; Changing of sign for FP values is doable using integer unit too.
+
+(define_expand "<code><mode>2"
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "")))]
+ "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+ "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+(define_insn "*absneg<mode>2_mixed"
+ [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r")
+ (match_operator:MODEF 3 "absneg_operator"
+ [(match_operand:MODEF 1 "register_operand" "0,x,0,0")]))
+ (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,X,X"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (<MODE>mode)"
+ "#")
+
+(define_insn "*absneg<mode>2_sse"
+ [(set (match_operand:MODEF 0 "register_operand" "=x,x,!r")
+ (match_operator:MODEF 3 "absneg_operator"
+ [(match_operand:MODEF 1 "register_operand" "0 ,x,0")]))
+ (use (match_operand:<ssevecmode> 2 "register_operand" "xm,0,X"))
+ (clobber (reg:CC FLAGS_REG))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "#")
+
+(define_insn "*absneg<mode>2_i387"
+ [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r")
+ (match_operator:X87MODEF 3 "absneg_operator"
+ [(match_operand:X87MODEF 1 "register_operand" "0,0")]))
+ (use (match_operand 2 "" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+ "#")
+
+(define_expand "<code>tf2"
+ [(set (match_operand:TF 0 "register_operand" "")
+ (absneg:TF (match_operand:TF 1 "register_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
+
+(define_insn "*absnegtf2_sse"
+ [(set (match_operand:TF 0 "register_operand" "=x,x")
+ (match_operator:TF 3 "absneg_operator"
+ [(match_operand:TF 1 "register_operand" "0,x")]))
+ (use (match_operand:TF 2 "nonimmediate_operand" "xm,0"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_SSE2"
+ "#")
+
+;; Splitters for fp abs and neg.
+
+(define_split
+ [(set (match_operand 0 "fp_register_operand" "")
+ (match_operator 1 "absneg_operator" [(match_dup 0)]))
+ (use (match_operand 2 "" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))])
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (match_operator 3 "absneg_operator"
+ [(match_operand 1 "register_operand" "")]))
+ (use (match_operand 2 "nonimmediate_operand" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed && SSE_REG_P (operands[0])"
+ [(set (match_dup 0) (match_dup 3))]
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ enum machine_mode vmode = GET_MODE (operands[2]);
+ rtx tmp;
+
+ operands[0] = simplify_gen_subreg (vmode, operands[0], mode, 0);
+ operands[1] = simplify_gen_subreg (vmode, operands[1], mode, 0);
+ if (operands_match_p (operands[0], operands[2]))
+ {
+ tmp = operands[1];
+ operands[1] = operands[2];
+ operands[2] = tmp;
+ }
+ if (GET_CODE (operands[3]) == ABS)
+ tmp = gen_rtx_AND (vmode, operands[1], operands[2]);
+ else
+ tmp = gen_rtx_XOR (vmode, operands[1], operands[2]);
+ operands[3] = tmp;
+})
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (match_operator:SF 1 "absneg_operator" [(match_dup 0)]))
+ (use (match_operand:V4SF 2 "" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ rtx tmp;
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ if (GET_CODE (operands[1]) == ABS)
+ {
+ tmp = gen_int_mode (0x7fffffff, SImode);
+ tmp = gen_rtx_AND (SImode, operands[0], tmp);
+ }
+ else
+ {
+ tmp = gen_int_mode (0x80000000, SImode);
+ tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+ }
+ operands[1] = tmp;
+})
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (match_operator:DF 1 "absneg_operator" [(match_dup 0)]))
+ (use (match_operand 2 "" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ rtx tmp;
+ if (TARGET_64BIT)
+ {
+ tmp = gen_lowpart (DImode, operands[0]);
+ tmp = gen_rtx_ZERO_EXTRACT (DImode, tmp, const1_rtx, GEN_INT (63));
+ operands[0] = tmp;
+
+ if (GET_CODE (operands[1]) == ABS)
+ tmp = const0_rtx;
+ else
+ tmp = gen_rtx_NOT (DImode, tmp);
+ }
+ else
+ {
+ operands[0] = gen_highpart (SImode, operands[0]);
+ if (GET_CODE (operands[1]) == ABS)
+ {
+ tmp = gen_int_mode (0x7fffffff, SImode);
+ tmp = gen_rtx_AND (SImode, operands[0], tmp);
+ }
+ else
+ {
+ tmp = gen_int_mode (0x80000000, SImode);
+ tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+ }
+ }
+ operands[1] = tmp;
+})
+
+(define_split
+ [(set (match_operand:XF 0 "register_operand" "")
+ (match_operator:XF 1 "absneg_operator" [(match_dup 0)]))
+ (use (match_operand 2 "" ""))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ rtx tmp;
+ operands[0] = gen_rtx_REG (SImode,
+ true_regnum (operands[0])
+ + (TARGET_64BIT ? 1 : 2));
+ if (GET_CODE (operands[1]) == ABS)
+ {
+ tmp = GEN_INT (0x7fff);
+ tmp = gen_rtx_AND (SImode, operands[0], tmp);
+ }
+ else
+ {
+ tmp = GEN_INT (0x8000);
+ tmp = gen_rtx_XOR (SImode, operands[0], tmp);
+ }
+ operands[1] = tmp;
+})
+
+;; Conditionalize these after reload. If they match before reload, we
+;; lose the clobber and ability to use integer instructions.
+
+(define_insn "*<code><mode>2_1"
+ [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+ (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))]
+ "TARGET_80387
+ && (reload_completed
+ || !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
+ "f<absnegprefix>"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*<code>extendsfdf2"
+ [(set (match_operand:DF 0 "register_operand" "=f")
+ (absneg:DF (float_extend:DF
+ (match_operand:SF 1 "register_operand" "0"))))]
+ "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+ "f<absnegprefix>"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "DF")])
+
+(define_insn "*<code>extendsfxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (absneg:XF (float_extend:XF
+ (match_operand:SF 1 "register_operand" "0"))))]
+ "TARGET_80387"
+ "f<absnegprefix>"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "XF")])
+
+(define_insn "*<code>extenddfxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (absneg:XF (float_extend:XF
+ (match_operand:DF 1 "register_operand" "0"))))]
+ "TARGET_80387"
+ "f<absnegprefix>"
+ [(set_attr "type" "fsgn")
+ (set_attr "mode" "XF")])
+
+;; Copysign instructions
+
+(define_mode_iterator CSGNMODE [SF DF TF])
+(define_mode_attr CSGNVMODE [(SF "V4SF") (DF "V2DF") (TF "TF")])
+
+(define_expand "copysign<mode>3"
+ [(match_operand:CSGNMODE 0 "register_operand" "")
+ (match_operand:CSGNMODE 1 "nonmemory_operand" "")
+ (match_operand:CSGNMODE 2 "register_operand" "")]
+ "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || (TARGET_SSE2 && (<MODE>mode == TFmode))"
+{
+ ix86_expand_copysign (operands);
+ DONE;
+})
+
+(define_insn_and_split "copysign<mode>3_const"
+ [(set (match_operand:CSGNMODE 0 "register_operand" "=x")
+ (unspec:CSGNMODE
+ [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "xmC")
+ (match_operand:CSGNMODE 2 "register_operand" "0")
+ (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")]
+ UNSPEC_COPYSIGN))]
+ "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || (TARGET_SSE2 && (<MODE>mode == TFmode))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ ix86_split_copysign_const (operands);
+ DONE;
+})
+
+(define_insn "copysign<mode>3_var"
+ [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x")
+ (unspec:CSGNMODE
+ [(match_operand:CSGNMODE 2 "register_operand" "x,0,0,x,x")
+ (match_operand:CSGNMODE 3 "register_operand" "1,1,x,1,x")
+ (match_operand:<CSGNVMODE> 4 "nonimmediate_operand" "X,xm,xm,0,0")
+ (match_operand:<CSGNVMODE> 5 "nonimmediate_operand" "0,xm,1,xm,1")]
+ UNSPEC_COPYSIGN))
+ (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))]
+ "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || (TARGET_SSE2 && (<MODE>mode == TFmode))"
+ "#")
+
+(define_split
+ [(set (match_operand:CSGNMODE 0 "register_operand" "")
+ (unspec:CSGNMODE
+ [(match_operand:CSGNMODE 2 "register_operand" "")
+ (match_operand:CSGNMODE 3 "register_operand" "")
+ (match_operand:<CSGNVMODE> 4 "" "")
+ (match_operand:<CSGNVMODE> 5 "" "")]
+ UNSPEC_COPYSIGN))
+ (clobber (match_scratch:<CSGNVMODE> 1 ""))]
+ "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || (TARGET_SSE2 && (<MODE>mode == TFmode)))
+ && reload_completed"
+ [(const_int 0)]
+{
+ ix86_split_copysign_var (operands);
+ DONE;
+})
+
+;; One complement instructions
+
+(define_expand "one_cmpldi2"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (not:DI (match_operand:DI 1 "nonimmediate_operand" "")))]
+ "TARGET_64BIT"
+ "ix86_expand_unary_operator (NOT, DImode, operands); DONE;")
+
+(define_insn "*one_cmpldi2_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (not:DI (match_operand:DI 1 "nonimmediate_operand" "0")))]
+ "TARGET_64BIT && ix86_unary_operator_ok (NOT, DImode, operands)"
+ "not{q}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "DI")])
+
+(define_insn "*one_cmpldi2_2_rex64"
+ [(set (reg FLAGS_REG)
+ (compare (not:DI (match_operand:DI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (not:DI (match_dup 1)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_unary_operator_ok (NOT, DImode, operands)"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 2 "compare_operator"
+ [(not:DI (match_operand:DI 3 "nonimmediate_operand" ""))
+ (const_int 0)]))
+ (set (match_operand:DI 1 "nonimmediate_operand" "")
+ (not:DI (match_dup 3)))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 2
+ [(xor:DI (match_dup 3) (const_int -1))
+ (const_int 0)]))
+ (set (match_dup 1)
+ (xor:DI (match_dup 3) (const_int -1)))])]
+ "")
+
+(define_expand "one_cmplsi2"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
+ ""
+ "ix86_expand_unary_operator (NOT, SImode, operands); DONE;")
+
+(define_insn "*one_cmplsi2_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (not:SI (match_operand:SI 1 "nonimmediate_operand" "0")))]
+ "ix86_unary_operator_ok (NOT, SImode, operands)"
+ "not{l}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "SI")])
+
+;; ??? Currently never generated - xor is used instead.
+(define_insn "*one_cmplsi2_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (not:SI (match_operand:SI 1 "register_operand" "0"))))]
+ "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)"
+ "not{l}\t%k0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "SI")])
+
+(define_insn "*one_cmplsi2_2"
+ [(set (reg FLAGS_REG)
+ (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (not:SI (match_dup 1)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_unary_operator_ok (NOT, SImode, operands)"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")])
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 2 "compare_operator"
+ [(not:SI (match_operand:SI 3 "nonimmediate_operand" ""))
+ (const_int 0)]))
+ (set (match_operand:SI 1 "nonimmediate_operand" "")
+ (not:SI (match_dup 3)))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
+ (const_int 0)]))
+ (set (match_dup 1)
+ (xor:SI (match_dup 3) (const_int -1)))])]
+ "")
+
+;; ??? Currently never generated - xor is used instead.
+(define_insn "*one_cmplsi2_2_zext"
+ [(set (reg FLAGS_REG)
+ (compare (not:SI (match_operand:SI 1 "register_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (not:SI (match_dup 1))))]
+ "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
+ && ix86_unary_operator_ok (NOT, SImode, operands)"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "SI")])
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 2 "compare_operator"
+ [(not:SI (match_operand:SI 3 "register_operand" ""))
+ (const_int 0)]))
+ (set (match_operand:DI 1 "register_operand" "")
+ (zero_extend:DI (not:SI (match_dup 3))))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1))
+ (const_int 0)]))
+ (set (match_dup 1)
+ (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])]
+ "")
+
+(define_expand "one_cmplhi2"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_unary_operator (NOT, HImode, operands); DONE;")
+
+(define_insn "*one_cmplhi2_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (not:HI (match_operand:HI 1 "nonimmediate_operand" "0")))]
+ "ix86_unary_operator_ok (NOT, HImode, operands)"
+ "not{w}\t%0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "HI")])
+
+(define_insn "*one_cmplhi2_2"
+ [(set (reg FLAGS_REG)
+ (compare (not:HI (match_operand:HI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (not:HI (match_dup 1)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_unary_operator_ok (NEG, HImode, operands)"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "HI")])
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 2 "compare_operator"
+ [(not:HI (match_operand:HI 3 "nonimmediate_operand" ""))
+ (const_int 0)]))
+ (set (match_operand:HI 1 "nonimmediate_operand" "")
+ (not:HI (match_dup 3)))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 2 [(xor:HI (match_dup 3) (const_int -1))
+ (const_int 0)]))
+ (set (match_dup 1)
+ (xor:HI (match_dup 3) (const_int -1)))])]
+ "")
+
+;; %%% Potential partial reg stall on alternative 1. What to do?
+(define_expand "one_cmplqi2"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_unary_operator (NOT, QImode, operands); DONE;")
+
+(define_insn "*one_cmplqi2_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
+ (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))]
+ "ix86_unary_operator_ok (NOT, QImode, operands)"
+ "@
+ not{b}\t%0
+ not{l}\t%k0"
+ [(set_attr "type" "negnot")
+ (set_attr "mode" "QI,SI")])
+
+(define_insn "*one_cmplqi2_2"
+ [(set (reg FLAGS_REG)
+ (compare (not:QI (match_operand:QI 1 "nonimmediate_operand" "0"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (not:QI (match_dup 1)))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && ix86_unary_operator_ok (NOT, QImode, operands)"
+ "#"
+ [(set_attr "type" "alu1")
+ (set_attr "mode" "QI")])
+
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 2 "compare_operator"
+ [(not:QI (match_operand:QI 3 "nonimmediate_operand" ""))
+ (const_int 0)]))
+ (set (match_operand:QI 1 "nonimmediate_operand" "")
+ (not:QI (match_dup 3)))]
+ "ix86_match_ccmode (insn, CCNOmode)"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 2 [(xor:QI (match_dup 3) (const_int -1))
+ (const_int 0)]))
+ (set (match_dup 1)
+ (xor:QI (match_dup 3) (const_int -1)))])]
+ "")
+
+;; Arithmetic shift instructions
+
+;; DImode shifts are implemented using the i386 "shift double" opcode,
+;; which is written as "sh[lr]d[lw] imm,reg,reg/mem". If the shift count
+;; is variable, then the count is in %cl and the "imm" operand is dropped
+;; from the assembler input.
+;;
+;; This instruction shifts the target reg/mem as usual, but instead of
+;; shifting in zeros, bits are shifted in from reg operand. If the insn
+;; is a left shift double, bits are taken from the high order bits of
+;; reg, else if the insn is a shift right double, bits are taken from the
+;; low order bits of reg. So if %eax is "1234" and %edx is "5678",
+;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345".
+;;
+;; Since sh[lr]d does not change the `reg' operand, that is done
+;; separately, making all shifts emit pairs of shift double and normal
+;; shift. Since sh[lr]d does not shift more than 31 bits, and we wish to
+;; support a 63 bit shift, each shift where the count is in a reg expands
+;; to a pair of shifts, a branch, a shift by 32 and a label.
+;;
+;; If the shift count is a constant, we need never emit more than one
+;; shift pair, instead using moves and sign extension for counts greater
+;; than 31.
+
+(define_expand "ashlti3"
+ [(set (match_operand:TI 0 "register_operand" "")
+ (ashift:TI (match_operand:TI 1 "reg_or_pm1_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ "TARGET_64BIT"
+ "ix86_expand_binary_operator (ASHIFT, TImode, operands); DONE;")
+
+;; This pattern must be defined before *ashlti3_1 to prevent
+;; combine pass from converting sse2_ashlti3 to *ashlti3_1.
+
+(define_insn "*avx_ashlti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (ashift:TI (match_operand:TI 1 "register_operand" "x")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+ "TARGET_AVX"
+{
+ operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+ return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_ashlti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (ashift:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+ "TARGET_SSE2"
+{
+ operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+ return "pslldq\t{%2, %0|%0, %2}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*ashlti3_1"
+ [(set (match_operand:TI 0 "register_operand" "=&r,r")
+ (ashift:TI (match_operand:TI 1 "reg_or_pm1_operand" "n,0")
+ (match_operand:QI 2 "nonmemory_operand" "Oc,Oc")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_peephole2
+ [(match_scratch:DI 3 "r")
+ (parallel [(set (match_operand:TI 0 "register_operand" "")
+ (ashift:TI (match_operand:TI 1 "nonmemory_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_dup 3)]
+ "TARGET_64BIT"
+ [(const_int 0)]
+ "ix86_split_ashl (operands, operands[3], TImode); DONE;")
+
+(define_split
+ [(set (match_operand:TI 0 "register_operand" "")
+ (ashift:TI (match_operand:TI 1 "nonmemory_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+ ? epilogue_completed : reload_completed)"
+ [(const_int 0)]
+ "ix86_split_ashl (operands, NULL_RTX, TImode); DONE;")
+
+(define_insn "x86_64_shld"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+ (ior:DI (ashift:DI (match_dup 0)
+ (match_operand:QI 2 "nonmemory_operand" "Jc"))
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
+ (minus:QI (const_int 64) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "shld{q}\t{%s2%1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
+
+(define_expand "x86_64_shift_adj_1"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "")
+ (const_int 64))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "")
+ (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+ (match_operand:DI 1 "register_operand" "")
+ (match_dup 0)))
+ (set (match_dup 1)
+ (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+ (match_operand:DI 3 "register_operand" "r")
+ (match_dup 1)))]
+ "TARGET_64BIT"
+ "")
+
+(define_expand "x86_64_shift_adj_2"
+ [(use (match_operand:DI 0 "register_operand" ""))
+ (use (match_operand:DI 1 "register_operand" ""))
+ (use (match_operand:QI 2 "register_operand" ""))]
+ "TARGET_64BIT"
+{
+ rtx label = gen_label_rtx ();
+ rtx tmp;
+
+ emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64)));
+
+ tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ JUMP_LABEL (tmp) = label;
+
+ emit_move_insn (operands[0], operands[1]);
+ ix86_expand_clear (operands[1]);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ DONE;
+})
+
+(define_expand "ashldi3"
+ [(set (match_operand:DI 0 "shiftdi_operand" "")
+ (ashift:DI (match_operand:DI 1 "ashldi_input_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (ASHIFT, DImode, operands); DONE;")
+
+(define_insn "*ashldi3_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
+ (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0,l")
+ (match_operand:QI 2 "nonmemory_operand" "cJ,M")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ return "add{q}\t%0, %0";
+
+ case TYPE_LEA:
+ gcc_assert (CONST_INT_P (operands[2]));
+ gcc_assert ((unsigned HOST_WIDE_INT) INTVAL (operands[2]) <= 3);
+ operands[1] = gen_rtx_MULT (DImode, operands[1],
+ GEN_INT (1 << INTVAL (operands[2])));
+ return "lea{q}\t{%a1, %0|%0, %a1}";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{q}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{q}\t%0";
+ else
+ return "sal{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "lea")
+ (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "DI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (ashift:DI (match_operand:DI 1 "index_register_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])"
+ [(set (match_dup 0)
+ (mult:DI (match_dup 1)
+ (match_dup 2)))]
+ "operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode);")
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashldi3_cmp_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_63_operand" "J"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (ashift:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT
+ && (optimize_function_for_size_p (cfun)
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{q}\t%0, %0";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{q}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{q}\t%0";
+ else
+ return "sal{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "DI")])
+
+(define_insn "*ashldi3_cconly_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_63_operand" "J"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT
+ && (optimize_function_for_size_p (cfun)
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{q}\t%0, %0";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{q}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{q}\t%0";
+ else
+ return "sal{q}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "DI")])
+
+(define_insn "*ashldi3_1"
+ [(set (match_operand:DI 0 "register_operand" "=&r,r")
+ (ashift:DI (match_operand:DI 1 "reg_or_pm1_operand" "n,0")
+ (match_operand:QI 2 "nonmemory_operand" "Jc,Jc")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+;; By default we don't ask for a scratch register, because when DImode
+;; values are manipulated, registers are already at a premium. But if
+;; we have one handy, we won't turn it away.
+(define_peephole2
+ [(match_scratch:SI 3 "r")
+ (parallel [(set (match_operand:DI 0 "register_operand" "")
+ (ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_dup 3)]
+ "!TARGET_64BIT && TARGET_CMOVE"
+ [(const_int 0)]
+ "ix86_split_ashl (operands, operands[3], DImode); DONE;")
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+ ? epilogue_completed : reload_completed)"
+ [(const_int 0)]
+ "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;")
+
+(define_insn "x86_shld"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+ (ior:SI (ashift:SI (match_dup 0)
+ (match_operand:QI 2 "nonmemory_operand" "Ic"))
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+ (minus:QI (const_int 32) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "shld{l}\t{%s2%1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "mode" "SI")
+ (set_attr "pent_pair" "np")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
+
+(define_expand "x86_shift_adj_1"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "")
+ (const_int 32))
+ (const_int 0)))
+ (set (match_operand:SI 0 "register_operand" "")
+ (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+ (match_operand:SI 1 "register_operand" "")
+ (match_dup 0)))
+ (set (match_dup 1)
+ (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+ (match_operand:SI 3 "register_operand" "r")
+ (match_dup 1)))]
+ "TARGET_CMOVE"
+ "")
+
+(define_expand "x86_shift_adj_2"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (match_operand:SI 1 "register_operand" ""))
+ (use (match_operand:QI 2 "register_operand" ""))]
+ ""
+{
+ rtx label = gen_label_rtx ();
+ rtx tmp;
+
+ emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
+
+ tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ JUMP_LABEL (tmp) = label;
+
+ emit_move_insn (operands[0], operands[1]);
+ ix86_expand_clear (operands[1]);
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ DONE;
+})
+
+(define_expand "ashlsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (ASHIFT, SImode, operands); DONE;")
+
+(define_insn "*ashlsi3_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l")
+ (match_operand:QI 2 "nonmemory_operand" "cI,M")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ gcc_assert (rtx_equal_p (operands[0], operands[1]));
+ return "add{l}\t%0, %0";
+
+ case TYPE_LEA:
+ return "#";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{l}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{l}\t%0";
+ else
+ return "sal{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "lea")
+ (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "SI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (ashift (match_operand 1 "index_register_operand" "")
+ (match_operand:QI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])
+ && GET_MODE_SIZE (GET_MODE (operands[0])) <= 4"
+ [(const_int 0)]
+{
+ rtx pat;
+ enum machine_mode mode = GET_MODE (operands[0]);
+
+ if (GET_MODE_SIZE (mode) < 4)
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ if (mode != Pmode)
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode);
+
+ pat = gen_rtx_MULT (Pmode, operands[1], operands[2]);
+ if (Pmode != SImode)
+ pat = gen_rtx_SUBREG (SImode, pat, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat));
+ DONE;
+})
+
+;; Rare case of shifting RSP is handled by generating move and shift
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (ashift (match_operand 1 "register_operand" "")
+ (match_operand:QI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])"
+ [(const_int 0)]
+{
+ rtx pat, clob;
+ emit_move_insn (operands[0], operands[1]);
+ pat = gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_ASHIFT (GET_MODE (operands[0]),
+ operands[0], operands[2]));
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clob)));
+ DONE;
+})
+
+(define_insn "*ashlsi3_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extend:DI (ashift:SI (match_operand:SI 1 "register_operand" "0,l")
+ (match_operand:QI 2 "nonmemory_operand" "cI,M"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{l}\t%k0, %k0";
+
+ case TYPE_LEA:
+ return "#";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{l}\t{%b2, %k0|%k0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{l}\t%k0";
+ else
+ return "sal{l}\t{%2, %k0|%k0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "lea")
+ (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "SI")])
+
+;; Convert lea to the lea pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (zero_extend:DI (ashift (match_operand 1 "register_operand" "")
+ (match_operand:QI 2 "const_int_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && reload_completed
+ && true_regnum (operands[0]) != true_regnum (operands[1])"
+ [(set (match_dup 0) (zero_extend:DI
+ (subreg:SI (mult:SI (match_dup 1)
+ (match_dup 2)) 0)))]
+{
+ operands[1] = gen_lowpart (Pmode, operands[1]);
+ operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode);
+})
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashlsi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (ashift:SI (match_dup 1) (match_dup 2)))]
+ "(optimize_function_for_size_p (cfun)
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{l}\t%0, %0";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{l}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{l}\t%0";
+ else
+ return "sal{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashlsi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "(optimize_function_for_size_p (cfun)
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{l}\t%0, %0";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{l}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{l}\t%0";
+ else
+ return "sal{l}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashlsi3_cmp_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT
+ && (optimize_function_for_size_p (cfun)
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{l}\t%k0, %k0";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{l}\t{%b2, %k0|%k0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{l}\t%k0";
+ else
+ return "sal{l}\t{%2, %k0|%k0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "SI")])
+
+(define_expand "ashlhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (ASHIFT, HImode, operands); DONE;")
+
+(define_insn "*ashlhi3_1_lea"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l")
+ (match_operand:QI 2 "nonmemory_operand" "cI,M")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ return "#";
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{w}\t%0, %0";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{w}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{w}\t%0";
+ else
+ return "sal{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "lea")
+ (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "HI,SI")])
+
+(define_insn "*ashlhi3_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "nonmemory_operand" "cI")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{w}\t%0, %0";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{w}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{w}\t%0";
+ else
+ return "sal{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "HI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashlhi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (ashift:HI (match_dup 1) (match_dup 2)))]
+ "(optimize_function_for_size_p (cfun)
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{w}\t%0, %0";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{w}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{w}\t%0";
+ else
+ return "sal{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "HI")])
+
+(define_insn "*ashlhi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "(optimize_function_for_size_p (cfun)
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{w}\t%0, %0";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{w}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{w}\t%0";
+ else
+ return "sal{w}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "HI")])
+
+(define_expand "ashlqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (ASHIFT, QImode, operands); DONE;")
+
+;; %%% Potential partial reg stall on alternative 2. What to do?
+
+(define_insn "*ashlqi3_1_lea"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r")
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")
+ (match_operand:QI 2 "nonmemory_operand" "cI,cI,M")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_LEA:
+ return "#";
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
+ return "add{l}\t%k0, %k0";
+ else
+ return "add{b}\t%0, %0";
+
+ default:
+ if (REG_P (operands[2]))
+ {
+ if (get_attr_mode (insn) == MODE_SI)
+ return "sal{l}\t{%b2, %k0|%k0, %b2}";
+ else
+ return "sal{b}\t{%b2, %0|%0, %b2}";
+ }
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ {
+ if (get_attr_mode (insn) == MODE_SI)
+ return "sal{l}\t%0";
+ else
+ return "sal{b}\t%0";
+ }
+ else
+ {
+ if (get_attr_mode (insn) == MODE_SI)
+ return "sal{l}\t{%2, %k0|%k0, %2}";
+ else
+ return "sal{b}\t{%2, %0|%0, %2}";
+ }
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "2")
+ (const_string "lea")
+ (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "QI,SI,SI")])
+
+(define_insn "*ashlqi3_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r")
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "cI,cI")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_PARTIAL_REG_STALL
+ && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
+ return "add{l}\t%k0, %k0";
+ else
+ return "add{b}\t%0, %0";
+
+ default:
+ if (REG_P (operands[2]))
+ {
+ if (get_attr_mode (insn) == MODE_SI)
+ return "sal{l}\t{%b2, %k0|%k0, %b2}";
+ else
+ return "sal{b}\t{%b2, %0|%0, %b2}";
+ }
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ {
+ if (get_attr_mode (insn) == MODE_SI)
+ return "sal{l}\t%0";
+ else
+ return "sal{b}\t%0";
+ }
+ else
+ {
+ if (get_attr_mode (insn) == MODE_SI)
+ return "sal{l}\t{%2, %k0|%k0, %2}";
+ else
+ return "sal{b}\t{%2, %0|%0, %2}";
+ }
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "QI,SI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashlqi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (ashift:QI (match_dup 1) (match_dup 2)))]
+ "(optimize_function_for_size_p (cfun)
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{b}\t%0, %0";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{b}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{b}\t%0";
+ else
+ return "sal{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*ashlqi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "(optimize_function_for_size_p (cfun)
+ || !TARGET_PARTIAL_FLAG_REG_STALL
+ || (operands[2] == const1_rtx
+ && (TARGET_SHIFT1
+ || TARGET_DOUBLE_WITH_ADD)))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ gcc_assert (operands[2] == const1_rtx);
+ return "add{b}\t%0, %0";
+
+ default:
+ if (REG_P (operands[2]))
+ return "sal{b}\t{%b2, %0|%0, %b2}";
+ else if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "sal{b}\t%0";
+ else
+ return "sal{b}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set (attr "type")
+ (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+ (const_int 0))
+ (match_operand 0 "register_operand" ""))
+ (match_operand 2 "const1_operand" ""))
+ (const_string "alu")
+ ]
+ (const_string "ishift")))
+ (set_attr "mode" "QI")])
+
+;; See comment above `ashldi3' about how this works.
+
+(define_expand "ashrti3"
+ [(set (match_operand:TI 0 "register_operand" "")
+ (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ "TARGET_64BIT"
+ "ix86_expand_binary_operator (ASHIFTRT, TImode, operands); DONE;")
+
+(define_insn "*ashrti3_1"
+ [(set (match_operand:TI 0 "register_operand" "=r")
+ (ashiftrt:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:QI 2 "nonmemory_operand" "Oc")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_peephole2
+ [(match_scratch:DI 3 "r")
+ (parallel [(set (match_operand:TI 0 "register_operand" "")
+ (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_dup 3)]
+ "TARGET_64BIT"
+ [(const_int 0)]
+ "ix86_split_ashr (operands, operands[3], TImode); DONE;")
+
+(define_split
+ [(set (match_operand:TI 0 "register_operand" "")
+ (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+ ? epilogue_completed : reload_completed)"
+ [(const_int 0)]
+ "ix86_split_ashr (operands, NULL_RTX, TImode); DONE;")
+
+(define_insn "x86_64_shrd"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+ (ior:DI (ashiftrt:DI (match_dup 0)
+ (match_operand:QI 2 "nonmemory_operand" "Jc"))
+ (ashift:DI (match_operand:DI 1 "register_operand" "r")
+ (minus:QI (const_int 64) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "shrd{q}\t{%s2%1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
+
+(define_expand "ashrdi3"
+ [(set (match_operand:DI 0 "shiftdi_operand" "")
+ (ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (ASHIFTRT, DImode, operands); DONE;")
+
+(define_expand "x86_64_shift_adj_3"
+ [(use (match_operand:DI 0 "register_operand" ""))
+ (use (match_operand:DI 1 "register_operand" ""))
+ (use (match_operand:QI 2 "register_operand" ""))]
+ ""
+{
+ rtx label = gen_label_rtx ();
+ rtx tmp;
+
+ emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64)));
+
+ tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ JUMP_LABEL (tmp) = label;
+
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn (gen_ashrdi3_63_rex64 (operands[1], operands[1], GEN_INT (63)));
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ DONE;
+})
+
+(define_insn "ashrdi3_63_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0")
+ (match_operand:DI 2 "const_int_operand" "i,i")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && INTVAL (operands[2]) == 63
+ && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+ "@
+ {cqto|cqo}
+ sar{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "imovx,ishift")
+ (set_attr "prefix_0f" "0,*")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "modrm" "0,1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*ashrdi3_1_one_bit_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+ "sar{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:DI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrdi3_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "J,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+ "@
+ sar{q}\t{%2, %0|%0, %2}
+ sar{q}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrdi3_one_bit_cmp_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+ "sar{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:DI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrdi3_one_bit_cconly_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+ "sar{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrdi3_cmp_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_63_operand" "J"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT
+ && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+ "sar{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+(define_insn "*ashrdi3_cconly_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_63_operand" "J"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT
+ && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+ "sar{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+(define_insn "*ashrdi3_1"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:QI 2 "nonmemory_operand" "Jc")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+;; By default we don't ask for a scratch register, because when DImode
+;; values are manipulated, registers are already at a premium. But if
+;; we have one handy, we won't turn it away.
+(define_peephole2
+ [(match_scratch:SI 3 "r")
+ (parallel [(set (match_operand:DI 0 "register_operand" "")
+ (ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_dup 3)]
+ "!TARGET_64BIT && TARGET_CMOVE"
+ [(const_int 0)]
+ "ix86_split_ashr (operands, operands[3], DImode); DONE;")
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (ashiftrt:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+ ? epilogue_completed : reload_completed)"
+ [(const_int 0)]
+ "ix86_split_ashr (operands, NULL_RTX, DImode); DONE;")
+
+(define_insn "x86_shrd"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+ (ior:SI (ashiftrt:SI (match_dup 0)
+ (match_operand:QI 2 "nonmemory_operand" "Ic"))
+ (ashift:SI (match_operand:SI 1 "register_operand" "r")
+ (minus:QI (const_int 32) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "prefix_0f" "1")
+ (set_attr "pent_pair" "np")
+ (set_attr "mode" "SI")])
+
+(define_expand "x86_shift_adj_3"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (match_operand:SI 1 "register_operand" ""))
+ (use (match_operand:QI 2 "register_operand" ""))]
+ ""
+{
+ rtx label = gen_label_rtx ();
+ rtx tmp;
+
+ emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
+
+ tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+ gen_rtx_LABEL_REF (VOIDmode, label),
+ pc_rtx);
+ tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ JUMP_LABEL (tmp) = label;
+
+ emit_move_insn (operands[0], operands[1]);
+ emit_insn (gen_ashrsi3_31 (operands[1], operands[1], GEN_INT (31)));
+
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+
+ DONE;
+})
+
+(define_expand "ashrsi3_31"
+ [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm")
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0")
+ (match_operand:SI 2 "const_int_operand" "i,i")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_insn "*ashrsi3_31"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm")
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0")
+ (match_operand:SI 2 "const_int_operand" "i,i")))
+ (clobber (reg:CC FLAGS_REG))]
+ "INTVAL (operands[2]) == 31
+ && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "@
+ {cltd|cdq}
+ sar{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "imovx,ishift")
+ (set_attr "prefix_0f" "0,*")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "modrm" "0,1")
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_31_zext"
+ [(set (match_operand:DI 0 "register_operand" "=*d,r")
+ (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
+ (match_operand:SI 2 "const_int_operand" "i,i"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
+ && INTVAL (operands[2]) == 31
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "@
+ {cltd|cdq}
+ sar{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "imovx,ishift")
+ (set_attr "prefix_0f" "0,*")
+ (set_attr "length_immediate" "0,*")
+ (set_attr "modrm" "0,1")
+ (set_attr "mode" "SI")])
+
+(define_expand "ashrsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (ASHIFTRT, SImode, operands); DONE;")
+
+(define_insn "*ashrsi3_1_one_bit"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "sar{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrsi3_1_one_bit_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "sar{l}\t%k0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+(define_insn "*ashrsi3_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "@
+ sar{l}\t{%2, %0|%0, %2}
+ sar{l}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "@
+ sar{l}\t{%2, %k0|%k0, %2}
+ sar{l}\t{%b2, %k0|%k0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrsi3_one_bit_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:SI (match_dup 1) (match_dup 2)))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "sar{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrsi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "sar{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+(define_insn "*ashrsi3_one_bit_cmp_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "sar{l}\t%k0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrsi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:SI (match_dup 1) (match_dup 2)))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "sar{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "sar{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*ashrsi3_cmp_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
+ "TARGET_64BIT
+ && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+ "sar{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_expand "ashrhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (ASHIFTRT, HImode, operands); DONE;")
+
+(define_insn "*ashrhi3_1_one_bit"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+ "sar{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrhi3_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+ "@
+ sar{w}\t{%2, %0|%0, %2}
+ sar{w}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrhi3_one_bit_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:HI (match_dup 1) (match_dup 2)))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+ "sar{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrhi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+ "sar{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrhi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (ashiftrt:HI (match_dup 1) (match_dup 2)))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+ "sar{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+(define_insn "*ashrhi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+ "sar{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+(define_expand "ashrqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (ASHIFTRT, QImode, operands); DONE;")
+
+(define_insn "*ashrqi3_1_one_bit"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+ "sar{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrqi3_1_one_bit_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (ashiftrt:QI (match_dup 0)
+ (match_operand:QI 1 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+ "sar{b}\t%0"
+ [(set_attr "type" "ishift1")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrqi3_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+ "@
+ sar{b}\t{%2, %0|%0, %2}
+ sar{b}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+(define_insn "*ashrqi3_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+ (ashiftrt:QI (match_dup 0)
+ (match_operand:QI 1 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ sar{b}\t{%1, %0|%0, %1}
+ sar{b}\t{%b1, %0|%0, %b1}"
+ [(set_attr "type" "ishift1")
+ (set_attr "mode" "QI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrqi3_one_bit_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (ashiftrt:QI (match_dup 1) (match_dup 2)))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+ "sar{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*ashrqi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+ "sar{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*ashrqi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (ashiftrt:QI (match_dup 1) (match_dup 2)))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+ "sar{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+(define_insn "*ashrqi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+ "sar{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+
+;; Logical shift instructions
+
+;; See comment above `ashldi3' about how this works.
+
+(define_expand "lshrti3"
+ [(set (match_operand:TI 0 "register_operand" "")
+ (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ "TARGET_64BIT"
+ "ix86_expand_binary_operator (LSHIFTRT, TImode, operands); DONE;")
+
+;; This pattern must be defined before *lshrti3_1 to prevent
+;; combine pass from converting sse2_lshrti3 to *lshrti3_1.
+
+(define_insn "*avx_lshrti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (lshiftrt:TI (match_operand:TI 1 "register_operand" "x")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+ "TARGET_AVX"
+{
+ operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+ return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_lshrti3"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+ "TARGET_SSE2"
+{
+ operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+ return "psrldq\t{%2, %0|%0, %2}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*lshrti3_1"
+ [(set (match_operand:TI 0 "register_operand" "=r")
+ (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
+ (match_operand:QI 2 "nonmemory_operand" "Oc")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+(define_peephole2
+ [(match_scratch:DI 3 "r")
+ (parallel [(set (match_operand:TI 0 "register_operand" "")
+ (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_dup 3)]
+ "TARGET_64BIT"
+ [(const_int 0)]
+ "ix86_split_lshr (operands, operands[3], TImode); DONE;")
+
+(define_split
+ [(set (match_operand:TI 0 "register_operand" "")
+ (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+ ? epilogue_completed : reload_completed)"
+ [(const_int 0)]
+ "ix86_split_lshr (operands, NULL_RTX, TImode); DONE;")
+
+(define_expand "lshrdi3"
+ [(set (match_operand:DI 0 "shiftdi_operand" "")
+ (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;")
+
+(define_insn "*lshrdi3_1_one_bit_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:DI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrdi3_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "J,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "@
+ shr{q}\t{%2, %0|%0, %2}
+ shr{q}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrdi3_cmp_one_bit_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:DI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrdi3_cconly_one_bit_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{q}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrdi3_cmp_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_63_operand" "J"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:DI (match_dup 1) (match_dup 2)))]
+ "TARGET_64BIT
+ && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+(define_insn "*lshrdi3_cconly_rex64"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_63_operand" "J"))
+ (const_int 0)))
+ (clobber (match_scratch:DI 0 "=r"))]
+ "TARGET_64BIT
+ && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{q}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "DI")])
+
+(define_insn "*lshrdi3_1"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:QI 2 "nonmemory_operand" "Jc")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ "#"
+ [(set_attr "type" "multi")])
+
+;; By default we don't ask for a scratch register, because when DImode
+;; values are manipulated, registers are already at a premium. But if
+;; we have one handy, we won't turn it away.
+(define_peephole2
+ [(match_scratch:SI 3 "r")
+ (parallel [(set (match_operand:DI 0 "register_operand" "")
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_dup 3)]
+ "!TARGET_64BIT && TARGET_CMOVE"
+ [(const_int 0)]
+ "ix86_split_lshr (operands, operands[3], DImode); DONE;")
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (lshiftrt:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+ ? epilogue_completed : reload_completed)"
+ [(const_int 0)]
+ "ix86_split_lshr (operands, NULL_RTX, DImode); DONE;")
+
+(define_expand "lshrsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (LSHIFTRT, SImode, operands); DONE;")
+
+(define_insn "*lshrsi3_1_one_bit"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrsi3_1_one_bit_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "0"))
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{l}\t%k0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+(define_insn "*lshrsi3_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "@
+ shr{l}\t{%2, %0|%0, %2}
+ shr{l}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extend:DI
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "@
+ shr{l}\t{%2, %k0|%k0, %2}
+ shr{l}\t{%b2, %k0|%k0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrsi3_one_bit_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:SI (match_dup 1) (match_dup 2)))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrsi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{l}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+(define_insn "*lshrsi3_cmp_one_bit_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{l}\t%k0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrsi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:SI (match_dup 1) (match_dup 2)))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_insn "*lshrsi3_cmp_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
+ "TARGET_64BIT
+ && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{l}\t{%2, %k0|%k0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "SI")])
+
+(define_expand "lshrhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (LSHIFTRT, HImode, operands); DONE;")
+
+(define_insn "*lshrhi3_1_one_bit"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrhi3_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "@
+ shr{w}\t{%2, %0|%0, %2}
+ shr{w}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrhi3_one_bit_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:HI (match_dup 1) (match_dup 2)))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrhi3_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{w}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrhi3_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (lshiftrt:HI (match_dup 1) (match_dup 2)))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+(define_insn "*lshrhi3_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:HI 0 "=r"))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+ "shr{w}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "HI")])
+
+(define_expand "lshrqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (LSHIFTRT, QImode, operands); DONE;")
+
+(define_insn "*lshrqi3_1_one_bit"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+ "shr{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrqi3_1_one_bit_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (lshiftrt:QI (match_dup 0)
+ (match_operand:QI 1 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
+ "shr{b}\t%0"
+ [(set_attr "type" "ishift1")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrqi3_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+ "@
+ shr{b}\t{%2, %0|%0, %2}
+ shr{b}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+(define_insn "*lshrqi3_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+ (lshiftrt:QI (match_dup 0)
+ (match_operand:QI 1 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ shr{b}\t{%1, %0|%0, %1}
+ shr{b}\t{%b1, %0|%0, %b1}"
+ [(set_attr "type" "ishift1")
+ (set_attr "mode" "QI")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrqi2_one_bit_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (lshiftrt:QI (match_dup 1) (match_dup 2)))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+ "shr{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*lshrqi2_one_bit_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+ "shr{b}\t%0"
+ [(set_attr "type" "ishift")
+ (set_attr "length" "2")])
+
+;; This pattern can't accept a variable shift count, since shifts by
+;; zero don't affect the flags. We assume that shifts by constant
+;; zero are optimized away.
+(define_insn "*lshrqi2_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (lshiftrt:QI (match_dup 1) (match_dup 2)))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+ "shr{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+(define_insn "*lshrqi2_cconly"
+ [(set (reg FLAGS_REG)
+ (compare
+ (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I"))
+ (const_int 0)))
+ (clobber (match_scratch:QI 0 "=q"))]
+ "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
+ && ix86_match_ccmode (insn, CCGOCmode)
+ && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+ "shr{b}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ishift")
+ (set_attr "mode" "QI")])
+
+;; Rotate instructions
+
+(define_expand "rotldi3"
+ [(set (match_operand:DI 0 "shiftdi_operand" "")
+ (rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+{
+ if (TARGET_64BIT)
+ {
+ ix86_expand_binary_operator (ROTATE, DImode, operands);
+ DONE;
+ }
+ if (!const_1_to_31_operand (operands[2], VOIDmode))
+ FAIL;
+ emit_insn (gen_ix86_rotldi3 (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+;; Implement rotation using two double-precision shift instructions
+;; and a scratch register.
+(define_insn_and_split "ix86_rotldi3"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (rotate:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_scratch:SI 3 "=&r"))]
+ "!TARGET_64BIT"
+ ""
+ "&& reload_completed"
+ [(set (match_dup 3) (match_dup 4))
+ (parallel
+ [(set (match_dup 4)
+ (ior:SI (ashift:SI (match_dup 4) (match_dup 2))
+ (lshiftrt:SI (match_dup 5)
+ (minus:QI (const_int 32) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 5)
+ (ior:SI (ashift:SI (match_dup 5) (match_dup 2))
+ (lshiftrt:SI (match_dup 3)
+ (minus:QI (const_int 32) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_di (&operands[0], 1, &operands[4], &operands[5]);")
+
+(define_insn "*rotlsi3_1_one_bit_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ROTATE, DImode, operands)"
+ "rol{q}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand:DI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotldi3_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
+ (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "e,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands)"
+ "@
+ rol{q}\t{%2, %0|%0, %2}
+ rol{q}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "DI")])
+
+(define_expand "rotlsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (ROTATE, SImode, operands); DONE;")
+
+(define_insn "*rotlsi3_1_one_bit"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ROTATE, SImode, operands)"
+ "rol{l}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotlsi3_1_one_bit_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (rotate:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ROTATE, SImode, operands)"
+ "rol{l}\t%k0"
+ [(set_attr "type" "rotate")
+ (set_attr "length" "2")])
+
+(define_insn "*rotlsi3_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
+ (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATE, SImode, operands)"
+ "@
+ rol{l}\t{%2, %0|%0, %2}
+ rol{l}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rotlsi3_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extend:DI
+ (rotate:SI (match_operand:SI 1 "register_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands)"
+ "@
+ rol{l}\t{%2, %k0|%k0, %2}
+ rol{l}\t{%b2, %k0|%k0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "SI")])
+
+(define_expand "rotlhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (ROTATE, HImode, operands); DONE;")
+
+(define_insn "*rotlhi3_1_one_bit"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ROTATE, HImode, operands)"
+ "rol{w}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotlhi3_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
+ (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATE, HImode, operands)"
+ "@
+ rol{w}\t{%2, %0|%0, %2}
+ rol{w}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "HI")])
+
+(define_split
+ [(set (match_operand:HI 0 "register_operand" "")
+ (rotate:HI (match_dup 0) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (strict_low_part (match_dup 0))
+ (bswap:HI (match_dup 0)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_expand "rotlqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (ROTATE, QImode, operands); DONE;")
+
+(define_insn "*rotlqi3_1_one_bit_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (rotate:QI (match_dup 0)
+ (match_operand:QI 1 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
+ "rol{b}\t%0"
+ [(set_attr "type" "rotate1")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotlqi3_1_one_bit"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ROTATE, QImode, operands)"
+ "rol{b}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotlqi3_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+ (rotate:QI (match_dup 0)
+ (match_operand:QI 1 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ rol{b}\t{%1, %0|%0, %1}
+ rol{b}\t{%b1, %0|%0, %b1}"
+ [(set_attr "type" "rotate1")
+ (set_attr "mode" "QI")])
+
+(define_insn "*rotlqi3_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
+ (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATE, QImode, operands)"
+ "@
+ rol{b}\t{%2, %0|%0, %2}
+ rol{b}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "QI")])
+
+(define_expand "rotrdi3"
+ [(set (match_operand:DI 0 "shiftdi_operand" "")
+ (rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+{
+ if (TARGET_64BIT)
+ {
+ ix86_expand_binary_operator (ROTATERT, DImode, operands);
+ DONE;
+ }
+ if (!const_1_to_31_operand (operands[2], VOIDmode))
+ FAIL;
+ emit_insn (gen_ix86_rotrdi3 (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+;; Implement rotation using two double-precision shift instructions
+;; and a scratch register.
+(define_insn_and_split "ix86_rotrdi3"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (rotatert:DI (match_operand:DI 1 "register_operand" "0")
+ (match_operand:QI 2 "const_1_to_31_operand" "I")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (match_scratch:SI 3 "=&r"))]
+ "!TARGET_64BIT"
+ ""
+ "&& reload_completed"
+ [(set (match_dup 3) (match_dup 4))
+ (parallel
+ [(set (match_dup 4)
+ (ior:SI (ashiftrt:SI (match_dup 4) (match_dup 2))
+ (ashift:SI (match_dup 5)
+ (minus:QI (const_int 32) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 5)
+ (ior:SI (ashiftrt:SI (match_dup 5) (match_dup 2))
+ (ashift:SI (match_dup 3)
+ (minus:QI (const_int 32) (match_dup 2)))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_di (&operands[0], 1, &operands[4], &operands[5]);")
+
+(define_insn "*rotrdi3_1_one_bit_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ROTATERT, DImode, operands)"
+ "ror{q}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand:DI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotrdi3_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
+ (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "J,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands)"
+ "@
+ ror{q}\t{%2, %0|%0, %2}
+ ror{q}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "DI")])
+
+(define_expand "rotrsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ ""
+ "ix86_expand_binary_operator (ROTATERT, SImode, operands); DONE;")
+
+(define_insn "*rotrsi3_1_one_bit"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
+ "ror{l}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotrsi3_1_one_bit_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (rotatert:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:QI 2 "const1_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
+ "ror{l}\t%k0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand:SI 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotrsi3_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
+ (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATERT, SImode, operands)"
+ "@
+ ror{l}\t{%2, %0|%0, %2}
+ ror{l}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rotrsi3_1_zext"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (zero_extend:DI
+ (rotatert:SI (match_operand:SI 1 "register_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
+ "@
+ ror{l}\t{%2, %k0|%k0, %2}
+ ror{l}\t{%b2, %k0|%k0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "SI")])
+
+(define_expand "rotrhi3"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "ix86_expand_binary_operator (ROTATERT, HImode, operands); DONE;")
+
+(define_insn "*rotrhi3_one_bit"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
+ (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ROTATERT, HImode, operands)"
+ "ror{w}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotrhi3_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
+ (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATERT, HImode, operands)"
+ "@
+ ror{w}\t{%2, %0|%0, %2}
+ ror{w}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "HI")])
+
+(define_split
+ [(set (match_operand:HI 0 "register_operand" "")
+ (rotatert:HI (match_dup 0) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))]
+ "reload_completed"
+ [(parallel [(set (strict_low_part (match_dup 0))
+ (bswap:HI (match_dup 0)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_expand "rotrqi3"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "ix86_expand_binary_operator (ROTATERT, QImode, operands); DONE;")
+
+(define_insn "*rotrqi3_1_one_bit"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+ (match_operand:QI 2 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && ix86_binary_operator_ok (ROTATERT, QImode, operands)"
+ "ror{b}\t%0"
+ [(set_attr "type" "rotate")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotrqi3_1_one_bit_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (rotatert:QI (match_dup 0)
+ (match_operand:QI 1 "const1_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
+ "ror{b}\t%0"
+ [(set_attr "type" "rotate1")
+ (set (attr "length")
+ (if_then_else (match_operand 0 "register_operand" "")
+ (const_string "2")
+ (const_string "*")))])
+
+(define_insn "*rotrqi3_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
+ (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
+ (match_operand:QI 2 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ROTATERT, QImode, operands)"
+ "@
+ ror{b}\t{%2, %0|%0, %2}
+ ror{b}\t{%b2, %0|%0, %b2}"
+ [(set_attr "type" "rotate")
+ (set_attr "mode" "QI")])
+
+(define_insn "*rotrqi3_1_slp"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
+ (rotatert:QI (match_dup 0)
+ (match_operand:QI 1 "nonmemory_operand" "I,c")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ ror{b}\t{%1, %0|%0, %1}
+ ror{b}\t{%b1, %0|%0, %b1}"
+ [(set_attr "type" "rotate1")
+ (set_attr "mode" "QI")])
+
+;; Bit set / bit test instructions
+
+(define_expand "extv"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (sign_extract:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "const8_operand" "")
+ (match_operand:SI 3 "const8_operand" "")))]
+ ""
+{
+ /* Handle extractions from %ah et al. */
+ if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
+ FAIL;
+
+ /* From mips.md: extract_bit_field doesn't verify that our source
+ matches the predicate, so check it again here. */
+ if (! ext_register_operand (operands[1], VOIDmode))
+ FAIL;
+})
+
+(define_expand "extzv"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (zero_extract:SI (match_operand 1 "ext_register_operand" "")
+ (match_operand:SI 2 "const8_operand" "")
+ (match_operand:SI 3 "const8_operand" "")))]
+ ""
+{
+ /* Handle extractions from %ah et al. */
+ if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8)
+ FAIL;
+
+ /* From mips.md: extract_bit_field doesn't verify that our source
+ matches the predicate, so check it again here. */
+ if (! ext_register_operand (operands[1], VOIDmode))
+ FAIL;
+})
+
+(define_expand "insv"
+ [(set (zero_extract (match_operand 0 "ext_register_operand" "")
+ (match_operand 1 "const8_operand" "")
+ (match_operand 2 "const8_operand" ""))
+ (match_operand 3 "register_operand" ""))]
+ ""
+{
+ /* Handle insertions to %ah et al. */
+ if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
+ FAIL;
+
+ /* From mips.md: insert_bit_field doesn't verify that our source
+ matches the predicate, so check it again here. */
+ if (! ext_register_operand (operands[0], VOIDmode))
+ FAIL;
+
+ if (TARGET_64BIT)
+ emit_insn (gen_movdi_insv_1_rex64 (operands[0], operands[3]));
+ else
+ emit_insn (gen_movsi_insv_1 (operands[0], operands[3]));
+
+ DONE;
+})
+
+;; %%% bts, btr, btc, bt.
+;; In general these instructions are *slow* when applied to memory,
+;; since they enforce atomic operation. When applied to registers,
+;; it depends on the cpu implementation. They're never faster than
+;; the corresponding and/ior/xor operations, so with 32-bit there's
+;; no point. But in 64-bit, we can't hold the relevant immediates
+;; within the instruction itself, so operating on bits in the high
+;; 32-bits of a register becomes easier.
+;;
+;; These are slow on Nocona, but fast on Athlon64. We do require the use
+;; of btrq and btcq for corner cases of post-reload expansion of absdf and
+;; negdf respectively, so they can never be disabled entirely.
+
+(define_insn "*btsq"
+ [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+ (const_int 1)
+ (match_operand:DI 1 "const_0_to_63_operand" ""))
+ (const_int 1))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+ "bts{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")])
+
+(define_insn "*btrq"
+ [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+ (const_int 1)
+ (match_operand:DI 1 "const_0_to_63_operand" ""))
+ (const_int 0))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+ "btr{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")])
+
+(define_insn "*btcq"
+ [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
+ (const_int 1)
+ (match_operand:DI 1 "const_0_to_63_operand" ""))
+ (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
+ "btc{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")])
+
+;; Allow Nocona to avoid these instructions if a register is available.
+
+(define_peephole2
+ [(match_scratch:DI 2 "r")
+ (parallel [(set (zero_extract:DI
+ (match_operand:DI 0 "register_operand" "")
+ (const_int 1)
+ (match_operand:DI 1 "const_0_to_63_operand" ""))
+ (const_int 1))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT && !TARGET_USE_BT"
+ [(const_int 0)]
+{
+ HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+ rtx op1;
+
+ if (HOST_BITS_PER_WIDE_INT >= 64)
+ lo = (HOST_WIDE_INT)1 << i, hi = 0;
+ else if (i < HOST_BITS_PER_WIDE_INT)
+ lo = (HOST_WIDE_INT)1 << i, hi = 0;
+ else
+ lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+ op1 = immed_double_const (lo, hi, DImode);
+ if (i >= 31)
+ {
+ emit_move_insn (operands[2], op1);
+ op1 = operands[2];
+ }
+
+ emit_insn (gen_iordi3 (operands[0], operands[0], op1));
+ DONE;
+})
+
+(define_peephole2
+ [(match_scratch:DI 2 "r")
+ (parallel [(set (zero_extract:DI
+ (match_operand:DI 0 "register_operand" "")
+ (const_int 1)
+ (match_operand:DI 1 "const_0_to_63_operand" ""))
+ (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT && !TARGET_USE_BT"
+ [(const_int 0)]
+{
+ HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+ rtx op1;
+
+ if (HOST_BITS_PER_WIDE_INT >= 64)
+ lo = (HOST_WIDE_INT)1 << i, hi = 0;
+ else if (i < HOST_BITS_PER_WIDE_INT)
+ lo = (HOST_WIDE_INT)1 << i, hi = 0;
+ else
+ lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+ op1 = immed_double_const (~lo, ~hi, DImode);
+ if (i >= 32)
+ {
+ emit_move_insn (operands[2], op1);
+ op1 = operands[2];
+ }
+
+ emit_insn (gen_anddi3 (operands[0], operands[0], op1));
+ DONE;
+})
+
+(define_peephole2
+ [(match_scratch:DI 2 "r")
+ (parallel [(set (zero_extract:DI
+ (match_operand:DI 0 "register_operand" "")
+ (const_int 1)
+ (match_operand:DI 1 "const_0_to_63_operand" ""))
+ (not:DI (zero_extract:DI
+ (match_dup 0) (const_int 1) (match_dup 1))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT && !TARGET_USE_BT"
+ [(const_int 0)]
+{
+ HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo;
+ rtx op1;
+
+ if (HOST_BITS_PER_WIDE_INT >= 64)
+ lo = (HOST_WIDE_INT)1 << i, hi = 0;
+ else if (i < HOST_BITS_PER_WIDE_INT)
+ lo = (HOST_WIDE_INT)1 << i, hi = 0;
+ else
+ lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
+
+ op1 = immed_double_const (lo, hi, DImode);
+ if (i >= 31)
+ {
+ emit_move_insn (operands[2], op1);
+ op1 = operands[2];
+ }
+
+ emit_insn (gen_xordi3 (operands[0], operands[0], op1));
+ DONE;
+})
+
+(define_insn "*btdi_rex64"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extract:DI
+ (match_operand:DI 0 "register_operand" "r")
+ (const_int 1)
+ (match_operand:DI 1 "nonmemory_operand" "rN"))
+ (const_int 0)))]
+ "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))"
+ "bt{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")])
+
+(define_insn "*btsi"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extract:SI
+ (match_operand:SI 0 "register_operand" "r")
+ (const_int 1)
+ (match_operand:SI 1 "nonmemory_operand" "rN"))
+ (const_int 0)))]
+ "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+ "bt{l}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "alu1")])
+
+;; Store-flag instructions.
+
+;; For all sCOND expanders, also expand the compare or test insn that
+;; generates cc0. Generate an equality comparison if `seq' or `sne'.
+
+;; %%% Do the expansion to SImode. If PII, do things the xor+setcc way
+;; to avoid partial register stalls. Otherwise do things the setcc+movzx
+;; way, which can later delete the movzx if only QImode is needed.
+
+(define_expand "s<code>"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (int_cond:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ ""
+ "if (ix86_expand_setcc (<CODE>, operands[0])) DONE; else FAIL;")
+
+(define_expand "s<code>"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (fp_cond:QI (reg:CC FLAGS_REG) (const_int 0)))]
+ "TARGET_80387 || TARGET_SSE"
+ "if (ix86_expand_setcc (<CODE>, operands[0])) DONE; else FAIL;")
+
+(define_insn "*setcc_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (match_operator:QI 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)]))]
+ ""
+ "set%C1\t%0"
+ [(set_attr "type" "setcc")
+ (set_attr "mode" "QI")])
+
+(define_insn "*setcc_2"
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+ (match_operator:QI 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)]))]
+ ""
+ "set%C1\t%0"
+ [(set_attr "type" "setcc")
+ (set_attr "mode" "QI")])
+
+;; In general it is not safe to assume too much about CCmode registers,
+;; so simplify-rtx stops when it sees a second one. Under certain
+;; conditions this is safe on x86, so help combine not create
+;;
+;; seta %al
+;; testb %al, %al
+;; sete %al
+
+(define_split
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (ne:QI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (const_int 0)))]
+ ""
+ [(set (match_dup 0) (match_dup 1))]
+{
+ PUT_MODE (operands[1], QImode);
+})
+
+(define_split
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
+ (ne:QI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (const_int 0)))]
+ ""
+ [(set (match_dup 0) (match_dup 1))]
+{
+ PUT_MODE (operands[1], QImode);
+})
+
+(define_split
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (eq:QI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (const_int 0)))]
+ ""
+ [(set (match_dup 0) (match_dup 1))]
+{
+ rtx new_op1 = copy_rtx (operands[1]);
+ operands[1] = new_op1;
+ PUT_MODE (new_op1, QImode);
+ PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1),
+ GET_MODE (XEXP (new_op1, 0))));
+
+ /* Make sure that (a) the CCmode we have for the flags is strong
+ enough for the reversed compare or (b) we have a valid FP compare. */
+ if (! ix86_comparison_operator (new_op1, VOIDmode))
+ FAIL;
+})
+
+(define_split
+ [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
+ (eq:QI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (const_int 0)))]
+ ""
+ [(set (match_dup 0) (match_dup 1))]
+{
+ rtx new_op1 = copy_rtx (operands[1]);
+ operands[1] = new_op1;
+ PUT_MODE (new_op1, QImode);
+ PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1),
+ GET_MODE (XEXP (new_op1, 0))));
+
+ /* Make sure that (a) the CCmode we have for the flags is strong
+ enough for the reversed compare or (b) we have a valid FP compare. */
+ if (! ix86_comparison_operator (new_op1, VOIDmode))
+ FAIL;
+})
+
+;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
+;; subsequent logical operations are used to imitate conditional moves.
+;; 0xffffffff is NaN, but not in normalized form, so we can't represent
+;; it directly.
+
+(define_insn "*avx_setcc<mode>"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (match_operator:MODEF 1 "avx_comparison_float_operator"
+ [(match_operand:MODEF 2 "register_operand" "x")
+ (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+ "TARGET_AVX"
+ "vcmp%D1s<ssemodefsuffix>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*sse_setcc<mode>"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (match_operator:MODEF 1 "sse_comparison_operator"
+ [(match_operand:MODEF 2 "register_operand" "0")
+ (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
+ "cmp%D1s<ssemodefsuffix>\t{%3, %0|%0, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*sse5_setcc<mode>"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (match_operator:MODEF 1 "sse5_comparison_float_operator"
+ [(match_operand:MODEF 2 "register_operand" "x")
+ (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+ "TARGET_SSE5"
+ "com%Y1s<ssemodefsuffix>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sse4arg")
+ (set_attr "mode" "<MODE>")])
+
+
+;; Basic conditional jump instructions.
+;; We ignore the overflow flag for signed branch instructions.
+
+;; For all bCOND expanders, also expand the compare or test insn that
+;; generates reg FLAGS_REG. Generate an equality comparison if `beq' or `bne'.
+
+(define_expand "b<code>"
+ [(set (pc)
+ (if_then_else (int_cond:CC (reg:CC FLAGS_REG)
+ (const_int 0))
+ (label_ref (match_operand 0 ""))
+ (pc)))]
+ ""
+ "ix86_expand_branch (<CODE>, operands[0]); DONE;")
+
+(define_expand "b<code>"
+ [(set (pc)
+ (if_then_else (fp_cond:CC (reg:CC FLAGS_REG)
+ (const_int 0))
+ (label_ref (match_operand 0 ""))
+ (pc)))]
+ "TARGET_80387 || TARGET_SSE_MATH"
+ "ix86_expand_branch (<CODE>, operands[0]); DONE;")
+
+(define_insn "*jcc_1"
+ [(set (pc)
+ (if_then_else (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (label_ref (match_operand 0 "" ""))
+ (pc)))]
+ ""
+ "%+j%C1\t%l0"
+ [(set_attr "type" "ibr")
+ (set_attr "modrm" "0")
+ (set (attr "length")
+ (if_then_else (and (ge (minus (match_dup 0) (pc))
+ (const_int -126))
+ (lt (minus (match_dup 0) (pc))
+ (const_int 128)))
+ (const_int 2)
+ (const_int 6)))])
+
+(define_insn "*jcc_2"
+ [(set (pc)
+ (if_then_else (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (pc)
+ (label_ref (match_operand 0 "" ""))))]
+ ""
+ "%+j%c1\t%l0"
+ [(set_attr "type" "ibr")
+ (set_attr "modrm" "0")
+ (set (attr "length")
+ (if_then_else (and (ge (minus (match_dup 0) (pc))
+ (const_int -126))
+ (lt (minus (match_dup 0) (pc))
+ (const_int 128)))
+ (const_int 2)
+ (const_int 6)))])
+
+;; In general it is not safe to assume too much about CCmode registers,
+;; so simplify-rtx stops when it sees a second one. Under certain
+;; conditions this is safe on x86, so help combine not create
+;;
+;; seta %al
+;; testb %al, %al
+;; je Lfoo
+
+(define_split
+ [(set (pc)
+ (if_then_else (ne (match_operator 0 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (const_int 0))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))]
+ ""
+ [(set (pc)
+ (if_then_else (match_dup 0)
+ (label_ref (match_dup 1))
+ (pc)))]
+{
+ PUT_MODE (operands[0], VOIDmode);
+})
+
+(define_split
+ [(set (pc)
+ (if_then_else (eq (match_operator 0 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (const_int 0))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))]
+ ""
+ [(set (pc)
+ (if_then_else (match_dup 0)
+ (label_ref (match_dup 1))
+ (pc)))]
+{
+ rtx new_op0 = copy_rtx (operands[0]);
+ operands[0] = new_op0;
+ PUT_MODE (new_op0, VOIDmode);
+ PUT_CODE (new_op0, ix86_reverse_condition (GET_CODE (new_op0),
+ GET_MODE (XEXP (new_op0, 0))));
+
+ /* Make sure that (a) the CCmode we have for the flags is strong
+ enough for the reversed compare or (b) we have a valid FP compare. */
+ if (! ix86_comparison_operator (new_op0, VOIDmode))
+ FAIL;
+})
+
+;; zero_extend in SImode is correct, since this is what combine pass
+;; generates from shift insn with QImode operand. Actually, the mode of
+;; operand 2 (bit offset operand) doesn't matter since bt insn takes
+;; appropriate modulo of the bit offset value.
+
+(define_insn_and_split "*jcc_btdi_rex64"
+ [(set (pc)
+ (if_then_else (match_operator 0 "bt_comparison_operator"
+ [(zero_extract:DI
+ (match_operand:DI 1 "register_operand" "r")
+ (const_int 1)
+ (zero_extend:SI
+ (match_operand:QI 2 "register_operand" "r")))
+ (const_int 0)])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))"
+ "#"
+ "&& 1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extract:DI
+ (match_dup 1)
+ (const_int 1)
+ (match_dup 2))
+ (const_int 0)))
+ (set (pc)
+ (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+ (label_ref (match_dup 3))
+ (pc)))]
+{
+ operands[2] = simplify_gen_subreg (DImode, operands[2], QImode, 0);
+
+ PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; avoid useless masking of bit offset operand
+(define_insn_and_split "*jcc_btdi_mask_rex64"
+ [(set (pc)
+ (if_then_else (match_operator 0 "bt_comparison_operator"
+ [(zero_extract:DI
+ (match_operand:DI 1 "register_operand" "r")
+ (const_int 1)
+ (and:SI
+ (match_operand:SI 2 "register_operand" "r")
+ (match_operand:SI 3 "const_int_operand" "n")))])
+ (label_ref (match_operand 4 "" ""))
+ (pc)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))
+ && (INTVAL (operands[3]) & 0x3f) == 0x3f"
+ "#"
+ "&& 1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extract:DI
+ (match_dup 1)
+ (const_int 1)
+ (match_dup 2))
+ (const_int 0)))
+ (set (pc)
+ (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+ (label_ref (match_dup 4))
+ (pc)))]
+{
+ operands[2] = simplify_gen_subreg (DImode, operands[2], SImode, 0);
+
+ PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+(define_insn_and_split "*jcc_btsi"
+ [(set (pc)
+ (if_then_else (match_operator 0 "bt_comparison_operator"
+ [(zero_extract:SI
+ (match_operand:SI 1 "register_operand" "r")
+ (const_int 1)
+ (zero_extend:SI
+ (match_operand:QI 2 "register_operand" "r")))
+ (const_int 0)])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+ "#"
+ "&& 1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extract:SI
+ (match_dup 1)
+ (const_int 1)
+ (match_dup 2))
+ (const_int 0)))
+ (set (pc)
+ (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+ (label_ref (match_dup 3))
+ (pc)))]
+{
+ operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
+
+ PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; avoid useless masking of bit offset operand
+(define_insn_and_split "*jcc_btsi_mask"
+ [(set (pc)
+ (if_then_else (match_operator 0 "bt_comparison_operator"
+ [(zero_extract:SI
+ (match_operand:SI 1 "register_operand" "r")
+ (const_int 1)
+ (and:SI
+ (match_operand:SI 2 "register_operand" "r")
+ (match_operand:SI 3 "const_int_operand" "n")))])
+ (label_ref (match_operand 4 "" ""))
+ (pc)))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
+ && (INTVAL (operands[3]) & 0x1f) == 0x1f"
+ "#"
+ "&& 1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extract:SI
+ (match_dup 1)
+ (const_int 1)
+ (match_dup 2))
+ (const_int 0)))
+ (set (pc)
+ (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+ (label_ref (match_dup 4))
+ (pc)))]
+ "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));")
+
+(define_insn_and_split "*jcc_btsi_1"
+ [(set (pc)
+ (if_then_else (match_operator 0 "bt_comparison_operator"
+ [(and:SI
+ (lshiftrt:SI
+ (match_operand:SI 1 "register_operand" "r")
+ (match_operand:QI 2 "register_operand" "r"))
+ (const_int 1))
+ (const_int 0)])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+ "#"
+ "&& 1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extract:SI
+ (match_dup 1)
+ (const_int 1)
+ (match_dup 2))
+ (const_int 0)))
+ (set (pc)
+ (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+ (label_ref (match_dup 3))
+ (pc)))]
+{
+ operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
+
+ PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; avoid useless masking of bit offset operand
+(define_insn_and_split "*jcc_btsi_mask_1"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "bt_comparison_operator"
+ [(and:SI
+ (lshiftrt:SI
+ (match_operand:SI 1 "register_operand" "r")
+ (subreg:QI
+ (and:SI
+ (match_operand:SI 2 "register_operand" "r")
+ (match_operand:SI 3 "const_int_operand" "n")) 0))
+ (const_int 1))
+ (const_int 0)])
+ (label_ref (match_operand 4 "" ""))
+ (pc)))
+ (clobber (reg:CC FLAGS_REG))]
+ "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
+ && (INTVAL (operands[3]) & 0x1f) == 0x1f"
+ "#"
+ "&& 1"
+ [(set (reg:CCC FLAGS_REG)
+ (compare:CCC
+ (zero_extract:SI
+ (match_dup 1)
+ (const_int 1)
+ (match_dup 2))
+ (const_int 0)))
+ (set (pc)
+ (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+ (label_ref (match_dup 4))
+ (pc)))]
+ "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));")
+
+;; Define combination compare-and-branch fp compare instructions to use
+;; during early optimization. Splitting the operation apart early makes
+;; for bad code when we want to reverse the operation.
+
+(define_insn "*fp_jcc_1_mixed"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f,x")
+ (match_operand 2 "nonimmediate_operand" "f,xm")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "TARGET_MIX_SSE_I387
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_1_sse"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "x")
+ (match_operand 2 "nonimmediate_operand" "xm")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "TARGET_SSE_MATH
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_1_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "register_operand" "f")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && TARGET_CMOVE
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_2_mixed"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f,x")
+ (match_operand 2 "nonimmediate_operand" "f,xm")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "TARGET_MIX_SSE_I387
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_2_sse"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "x")
+ (match_operand 2 "nonimmediate_operand" "xm")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "TARGET_SSE_MATH
+ && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_2_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "register_operand" "f")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && TARGET_CMOVE
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_3_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "nonimmediate_operand" "fm")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 4 "=a"))]
+ "TARGET_80387
+ && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode)
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && !ix86_use_fcomi_compare (GET_CODE (operands[0]))
+ && SELECT_CC_MODE (GET_CODE (operands[0]),
+ operands[1], operands[2]) == CCFPmode
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_4_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "nonimmediate_operand" "fm")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 4 "=a"))]
+ "TARGET_80387
+ && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode)
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && !ix86_use_fcomi_compare (GET_CODE (operands[0]))
+ && SELECT_CC_MODE (GET_CODE (operands[0]),
+ operands[1], operands[2]) == CCFPmode
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_5_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "register_operand" "f")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 4 "=a"))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_6_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "register_operand" "f")])
+ (pc)
+ (label_ref (match_operand 3 "" ""))))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 4 "=a"))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+(define_insn "*fp_jcc_7_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "f")
+ (match_operand 2 "const0_operand" "")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 4 "=a"))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+ && GET_MODE (operands[1]) == GET_MODE (operands[2])
+ && !ix86_use_fcomi_compare (GET_CODE (operands[0]))
+ && SELECT_CC_MODE (GET_CODE (operands[0]),
+ operands[1], operands[2]) == CCFPmode
+ && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
+ "#")
+
+;; The order of operands in *fp_jcc_8_387 is forced by combine in
+;; simplify_comparison () function. Float operator is treated as RTX_OBJ
+;; with a precedence over other operators and is always put in the first
+;; place. Swap condition and operands to match ficom instruction.
+
+(define_insn "*fp_jcc_8<mode>_387"
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operator 1 "float_operator"
+ [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")])
+ (match_operand 3 "register_operand" "f,f")])
+ (label_ref (match_operand 4 "" ""))
+ (pc)))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 5 "=a,a"))]
+ "X87_FLOAT_MODE_P (GET_MODE (operands[3]))
+ && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
+ && GET_MODE (operands[1]) == GET_MODE (operands[3])
+ && !ix86_use_fcomi_compare (swap_condition (GET_CODE (operands[0])))
+ && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode
+ && ix86_fp_jump_nontrivial_p (swap_condition (GET_CODE (operands[0])))"
+ "#")
+
+(define_split
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "")
+ (match_operand 2 "nonimmediate_operand" "")])
+ (match_operand 3 "" "")
+ (match_operand 4 "" "")))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
+ operands[3], operands[4], NULL_RTX, NULL_RTX);
+ DONE;
+})
+
+(define_split
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operand 1 "register_operand" "")
+ (match_operand 2 "general_operand" "")])
+ (match_operand 3 "" "")
+ (match_operand 4 "" "")))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 5 "=a"))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
+ operands[3], operands[4], operands[5], NULL_RTX);
+ DONE;
+})
+
+(define_split
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operator 1 "float_operator"
+ [(match_operand:X87MODEI12 2 "memory_operand" "")])
+ (match_operand 3 "register_operand" "")])
+ (match_operand 4 "" "")
+ (match_operand 5 "" "")))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 6 "=a"))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[2]);
+ ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])),
+ operands[3], operands[7],
+ operands[4], operands[5], operands[6], NULL_RTX);
+ DONE;
+})
+
+;; %%% Kill this when reload knows how to do it.
+(define_split
+ [(set (pc)
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(match_operator 1 "float_operator"
+ [(match_operand:X87MODEI12 2 "register_operand" "")])
+ (match_operand 3 "register_operand" "")])
+ (match_operand 4 "" "")
+ (match_operand 5 "" "")))
+ (clobber (reg:CCFP FPSR_REG))
+ (clobber (reg:CCFP FLAGS_REG))
+ (clobber (match_scratch:HI 6 "=a"))]
+ "reload_completed"
+ [(const_int 0)]
+{
+ operands[7] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]);
+ operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[7]);
+ ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])),
+ operands[3], operands[7],
+ operands[4], operands[5], operands[6], operands[2]);
+ DONE;
+})
+
+;; Unconditional and other jump instructions
+
+(define_insn "jump"
+ [(set (pc)
+ (label_ref (match_operand 0 "" "")))]
+ ""
+ "jmp\t%l0"
+ [(set_attr "type" "ibr")
+ (set (attr "length")
+ (if_then_else (and (ge (minus (match_dup 0) (pc))
+ (const_int -126))
+ (lt (minus (match_dup 0) (pc))
+ (const_int 128)))
+ (const_int 2)
+ (const_int 5)))
+ (set_attr "modrm" "0")])
+
+(define_expand "indirect_jump"
+ [(set (pc) (match_operand 0 "nonimmediate_operand" ""))]
+ ""
+ "")
+
+(define_insn "*indirect_jump"
+ [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm"))]
+ ""
+ "jmp\t%A0"
+ [(set_attr "type" "ibr")
+ (set_attr "length_immediate" "0")])
+
+(define_expand "tablejump"
+ [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" ""))
+ (use (label_ref (match_operand 1 "" "")))])]
+ ""
+{
+ /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit)
+ relative. Convert the relative address to an absolute address. */
+ if (flag_pic)
+ {
+ rtx op0, op1;
+ enum rtx_code code;
+
+ /* We can't use @GOTOFF for text labels on VxWorks;
+ see gotoff_operand. */
+ if (TARGET_64BIT || TARGET_VXWORKS_RTP)
+ {
+ code = PLUS;
+ op0 = operands[0];
+ op1 = gen_rtx_LABEL_REF (Pmode, operands[1]);
+ }
+ else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA)
+ {
+ code = PLUS;
+ op0 = operands[0];
+ op1 = pic_offset_table_rtx;
+ }
+ else
+ {
+ code = MINUS;
+ op0 = pic_offset_table_rtx;
+ op1 = operands[0];
+ }
+
+ operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0,
+ OPTAB_DIRECT);
+ }
+})
+
+(define_insn "*tablejump_1"
+ [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm"))
+ (use (label_ref (match_operand 1 "" "")))]
+ ""
+ "jmp\t%A0"
+ [(set_attr "type" "ibr")
+ (set_attr "length_immediate" "0")])
+
+;; Convert setcc + movzbl to xor + setcc if operands don't overlap.
+
+(define_peephole2
+ [(set (reg FLAGS_REG) (match_operand 0 "" ""))
+ (set (match_operand:QI 1 "register_operand" "")
+ (match_operator:QI 2 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)]))
+ (set (match_operand 3 "q_regs_operand" "")
+ (zero_extend (match_dup 1)))]
+ "(peep2_reg_dead_p (3, operands[1])
+ || operands_match_p (operands[1], operands[3]))
+ && ! reg_overlap_mentioned_p (operands[3], operands[0])"
+ [(set (match_dup 4) (match_dup 0))
+ (set (strict_low_part (match_dup 5))
+ (match_dup 2))]
+{
+ operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+ operands[5] = gen_lowpart (QImode, operands[3]);
+ ix86_expand_clear (operands[3]);
+})
+
+;; Similar, but match zero_extendhisi2_and, which adds a clobber.
+
+(define_peephole2
+ [(set (reg FLAGS_REG) (match_operand 0 "" ""))
+ (set (match_operand:QI 1 "register_operand" "")
+ (match_operator:QI 2 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)]))
+ (parallel [(set (match_operand 3 "q_regs_operand" "")
+ (zero_extend (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "(peep2_reg_dead_p (3, operands[1])
+ || operands_match_p (operands[1], operands[3]))
+ && ! reg_overlap_mentioned_p (operands[3], operands[0])"
+ [(set (match_dup 4) (match_dup 0))
+ (set (strict_low_part (match_dup 5))
+ (match_dup 2))]
+{
+ operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
+ operands[5] = gen_lowpart (QImode, operands[3]);
+ ix86_expand_clear (operands[3]);
+})
+
+;; Call instructions.
+
+;; The predicates normally associated with named expanders are not properly
+;; checked for calls. This is a bug in the generic code, but it isn't that
+;; easy to fix. Ignore it for now and be prepared to fix things up.
+
+;; Call subroutine returning no value.
+
+(define_expand "call_pop"
+ [(parallel [(call (match_operand:QI 0 "" "")
+ (match_operand:SI 1 "" ""))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 3 "" "")))])]
+ "!TARGET_64BIT"
+{
+ ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3], 0);
+ DONE;
+})
+
+(define_insn "*call_pop_0"
+ [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" ""))
+ (match_operand:SI 1 "" ""))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 2 "immediate_operand" "")))]
+ "!TARGET_64BIT"
+{
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P0";
+ else
+ return "call\t%P0";
+}
+ [(set_attr "type" "call")])
+
+(define_insn "*call_pop_1"
+ [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm"))
+ (match_operand:SI 1 "" ""))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 2 "immediate_operand" "i")))]
+ "!TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[0], Pmode))
+ {
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P0";
+ else
+ return "call\t%P0";
+ }
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%A0";
+ else
+ return "call\t%A0";
+}
+ [(set_attr "type" "call")])
+
+(define_expand "call"
+ [(call (match_operand:QI 0 "" "")
+ (match_operand 1 "" ""))
+ (use (match_operand 2 "" ""))]
+ ""
+{
+ ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 0);
+ DONE;
+})
+
+(define_expand "sibcall"
+ [(call (match_operand:QI 0 "" "")
+ (match_operand 1 "" ""))
+ (use (match_operand 2 "" ""))]
+ ""
+{
+ ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 1);
+ DONE;
+})
+
+(define_insn "*call_0"
+ [(call (mem:QI (match_operand 0 "constant_call_address_operand" ""))
+ (match_operand 1 "" ""))]
+ ""
+{
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P0";
+ else
+ return "call\t%P0";
+}
+ [(set_attr "type" "call")])
+
+(define_insn "*call_1"
+ [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "rsm"))
+ (match_operand 1 "" ""))]
+ "!SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[0], Pmode))
+ return "call\t%P0";
+ return "call\t%A0";
+}
+ [(set_attr "type" "call")])
+
+(define_insn "*sibcall_1"
+ [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,c,d,a"))
+ (match_operand 1 "" ""))]
+ "SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[0], Pmode))
+ return "jmp\t%P0";
+ return "jmp\t%A0";
+}
+ [(set_attr "type" "call")])
+
+(define_insn "*call_1_rex64"
+ [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+ (match_operand 1 "" ""))]
+ "!SIBLING_CALL_P (insn) && TARGET_64BIT
+ && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+{
+ if (constant_call_address_operand (operands[0], Pmode))
+ return "call\t%P0";
+ return "call\t%A0";
+}
+ [(set_attr "type" "call")])
+
+(define_insn "*call_1_rex64_ms_sysv"
+ [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+ (match_operand 1 "" ""))
+ (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+ (clobber (reg:TI XMM6_REG))
+ (clobber (reg:TI XMM7_REG))
+ (clobber (reg:TI XMM8_REG))
+ (clobber (reg:TI XMM9_REG))
+ (clobber (reg:TI XMM10_REG))
+ (clobber (reg:TI XMM11_REG))
+ (clobber (reg:TI XMM12_REG))
+ (clobber (reg:TI XMM13_REG))
+ (clobber (reg:TI XMM14_REG))
+ (clobber (reg:TI XMM15_REG))
+ (clobber (reg:DI SI_REG))
+ (clobber (reg:DI DI_REG))]
+ "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[0], Pmode))
+ return "call\t%P0";
+ return "call\t%A0";
+}
+ [(set_attr "type" "call")])
+
+(define_insn "*call_1_rex64_large"
+ [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
+ (match_operand 1 "" ""))]
+ "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "call\t%A0"
+ [(set_attr "type" "call")])
+
+(define_insn "*sibcall_1_rex64"
+ [(call (mem:QI (match_operand:DI 0 "constant_call_address_operand" ""))
+ (match_operand 1 "" ""))]
+ "SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "jmp\t%P0"
+ [(set_attr "type" "call")])
+
+(define_insn "*sibcall_1_rex64_v"
+ [(call (mem:QI (reg:DI R11_REG))
+ (match_operand 0 "" ""))]
+ "SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "jmp\t{*%%}r11"
+ [(set_attr "type" "call")])
+
+
+;; Call subroutine, returning value in operand 0
+
+(define_expand "call_value_pop"
+ [(parallel [(set (match_operand 0 "" "")
+ (call (match_operand:QI 1 "" "")
+ (match_operand:SI 2 "" "")))
+ (set (reg:SI SP_REG)
+ (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 4 "" "")))])]
+ "!TARGET_64BIT"
+{
+ ix86_expand_call (operands[0], operands[1], operands[2],
+ operands[3], operands[4], 0);
+ DONE;
+})
+
+(define_expand "call_value"
+ [(set (match_operand 0 "" "")
+ (call (match_operand:QI 1 "" "")
+ (match_operand:SI 2 "" "")))
+ (use (match_operand:SI 3 "" ""))]
+ ;; Operand 2 not used on the i386.
+ ""
+{
+ ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 0);
+ DONE;
+})
+
+(define_expand "sibcall_value"
+ [(set (match_operand 0 "" "")
+ (call (match_operand:QI 1 "" "")
+ (match_operand:SI 2 "" "")))
+ (use (match_operand:SI 3 "" ""))]
+ ;; Operand 2 not used on the i386.
+ ""
+{
+ ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 1);
+ DONE;
+})
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+ [(parallel [(call (match_operand 0 "" "")
+ (const_int 0))
+ (match_operand 1 "" "")
+ (match_operand 2 "" "")])]
+ ""
+{
+ int i;
+
+ /* In order to give reg-stack an easier job in validating two
+ coprocessor registers as containing a possible return value,
+ simply pretend the untyped call returns a complex long double
+ value.
+
+ We can't use SSE_REGPARM_MAX here since callee is unprototyped
+ and should have the default ABI. */
+
+ ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
+ ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
+ operands[0], const0_rtx,
+ GEN_INT ((TARGET_64BIT
+ ? (DEFAULT_ABI == SYSV_ABI
+ ? X86_64_SSE_REGPARM_MAX
+ : X64_SSE_REGPARM_MAX)
+ : X86_32_SSE_REGPARM_MAX)
+ - 1),
+ NULL, 0);
+
+ for (i = 0; i < XVECLEN (operands[2], 0); i++)
+ {
+ rtx set = XVECEXP (operands[2], 0, i);
+ emit_move_insn (SET_DEST (set), SET_SRC (set));
+ }
+
+ /* The optimizer does not know that the call sets the function value
+ registers we stored in the result block. We avoid problems by
+ claiming that all hard registers are used and clobbered at this
+ point. */
+ emit_insn (gen_blockage ());
+
+ DONE;
+})
+
+;; Prologue and epilogue instructions
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory. This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+ [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+ ""
+ ""
+ [(set_attr "length" "0")])
+
+;; Do not schedule instructions accessing memory across this point.
+
+(define_expand "memory_blockage"
+ [(set (match_dup 0)
+ (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
+ ""
+{
+ operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*memory_blockage"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))]
+ ""
+ ""
+ [(set_attr "length" "0")])
+
+;; As USE insns aren't meaningful after reload, this is used instead
+;; to prevent deleting instructions setting registers for PIC code
+(define_insn "prologue_use"
+ [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_PROLOGUE_USE)]
+ ""
+ ""
+ [(set_attr "length" "0")])
+
+;; Insn emitted into the body of a function to return from a function.
+;; This is only done if the function's epilogue is known to be simple.
+;; See comments for ix86_can_use_return_insn_p in i386.c.
+
+(define_expand "return"
+ [(return)]
+ "ix86_can_use_return_insn_p ()"
+{
+ if (crtl->args.pops_args)
+ {
+ rtx popc = GEN_INT (crtl->args.pops_args);
+ emit_jump_insn (gen_return_pop_internal (popc));
+ DONE;
+ }
+})
+
+(define_insn "return_internal"
+ [(return)]
+ "reload_completed"
+ "ret"
+ [(set_attr "length" "1")
+ (set_attr "length_immediate" "0")
+ (set_attr "modrm" "0")])
+
+;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
+;; instruction Athlon and K8 have.
+
+(define_insn "return_internal_long"
+ [(return)
+ (unspec [(const_int 0)] UNSPEC_REP)]
+ "reload_completed"
+ "rep\;ret"
+ [(set_attr "length" "1")
+ (set_attr "length_immediate" "0")
+ (set_attr "prefix_rep" "1")
+ (set_attr "modrm" "0")])
+
+(define_insn "return_pop_internal"
+ [(return)
+ (use (match_operand:SI 0 "const_int_operand" ""))]
+ "reload_completed"
+ "ret\t%0"
+ [(set_attr "length" "3")
+ (set_attr "length_immediate" "2")
+ (set_attr "modrm" "0")])
+
+(define_insn "return_indirect_internal"
+ [(return)
+ (use (match_operand:SI 0 "register_operand" "r"))]
+ "reload_completed"
+ "jmp\t%A0"
+ [(set_attr "type" "ibr")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "nop"
+ [(const_int 0)]
+ ""
+ "nop"
+ [(set_attr "length" "1")
+ (set_attr "length_immediate" "0")
+ (set_attr "modrm" "0")])
+
+;; Align to 16-byte boundary, max skip in op0. Used to avoid
+;; branch prediction penalty for the third jump in a 16-byte
+;; block on K8.
+
+(define_insn "align"
+ [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_ALIGN)]
+ ""
+{
+#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
+ ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, 4, (int)INTVAL (operands[0]));
+#else
+ /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that.
+ The align insn is used to avoid 3 jump instructions in the row to improve
+ branch prediction and the benefits hardly outweigh the cost of extra 8
+ nops on the average inserted by full alignment pseudo operation. */
+#endif
+ return "";
+}
+ [(set_attr "length" "16")])
+
+(define_expand "prologue"
+ [(const_int 0)]
+ ""
+ "ix86_expand_prologue (); DONE;")
+
+(define_insn "set_got"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ { return output_set_got (operands[0], NULL_RTX); }
+ [(set_attr "type" "multi")
+ (set_attr "length" "12")])
+
+(define_insn "set_got_labelled"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(label_ref (match_operand 1 "" ""))]
+ UNSPEC_SET_GOT))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ { return output_set_got (operands[0], operands[1]); }
+ [(set_attr "type" "multi")
+ (set_attr "length" "12")])
+
+(define_insn "set_got_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))]
+ "TARGET_64BIT"
+ "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}"
+ [(set_attr "type" "lea")
+ (set_attr "length" "6")])
+
+(define_insn "set_rip_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(label_ref (match_operand 1 "" ""))] UNSPEC_SET_RIP))]
+ "TARGET_64BIT"
+ "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}"
+ [(set_attr "type" "lea")
+ (set_attr "length" "6")])
+
+(define_insn "set_got_offset_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI
+ [(label_ref (match_operand 1 "" ""))]
+ UNSPEC_SET_GOT_OFFSET))]
+ "TARGET_64BIT"
+ "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}"
+ [(set_attr "type" "imov")
+ (set_attr "length" "11")])
+
+(define_expand "epilogue"
+ [(const_int 0)]
+ ""
+ "ix86_expand_epilogue (1); DONE;")
+
+(define_expand "sibcall_epilogue"
+ [(const_int 0)]
+ ""
+ "ix86_expand_epilogue (0); DONE;")
+
+(define_expand "eh_return"
+ [(use (match_operand 0 "register_operand" ""))]
+ ""
+{
+ rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
+
+ /* Tricky bit: we write the address of the handler to which we will
+ be returning into someone else's stack frame, one word below the
+ stack address we wish to restore. */
+ tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa);
+ tmp = plus_constant (tmp, -UNITS_PER_WORD);
+ tmp = gen_rtx_MEM (Pmode, tmp);
+ emit_move_insn (tmp, ra);
+
+ emit_jump_insn (gen_eh_return_internal ());
+ emit_barrier ();
+ DONE;
+})
+
+(define_insn_and_split "eh_return_internal"
+ [(eh_return)]
+ ""
+ "#"
+ "epilogue_completed"
+ [(const_int 0)]
+ "ix86_expand_epilogue (2); DONE;")
+
+(define_insn "leave"
+ [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4)))
+ (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG)))
+ (clobber (mem:BLK (scratch)))]
+ "!TARGET_64BIT"
+ "leave"
+ [(set_attr "type" "leave")])
+
+(define_insn "leave_rex64"
+ [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
+ (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
+ (clobber (mem:BLK (scratch)))]
+ "TARGET_64BIT"
+ "leave"
+ [(set_attr "type" "leave")])
+
+(define_expand "ffssi2"
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+ (clobber (match_scratch:SI 2 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ if (TARGET_CMOVE)
+ {
+ emit_insn (gen_ffs_cmove (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_expand "ffs_cmove"
+ [(set (match_dup 2) (const_int -1))
+ (parallel [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "")
+ (const_int 0)))
+ (set (match_operand:SI 0 "register_operand" "")
+ (ctz:SI (match_dup 1)))])
+ (set (match_dup 0) (if_then_else:SI
+ (eq (reg:CCZ FLAGS_REG) (const_int 0))
+ (match_dup 2)
+ (match_dup 0)))
+ (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_CMOVE"
+ "operands[2] = gen_reg_rtx (SImode);")
+
+(define_insn_and_split "*ffs_no_cmove"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
+ (clobber (match_scratch:SI 2 "=&q"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_CMOVE"
+ "#"
+ "&& reload_completed"
+ [(parallel [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_dup 1) (const_int 0)))
+ (set (match_dup 0) (ctz:SI (match_dup 1)))])
+ (set (strict_low_part (match_dup 3))
+ (eq:QI (reg:CCZ FLAGS_REG) (const_int 0)))
+ (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[3] = gen_lowpart (QImode, operands[2]);
+ ix86_expand_clear (operands[2]);
+})
+
+(define_insn "*ffssi_1"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
+ (const_int 0)))
+ (set (match_operand:SI 0 "register_operand" "=r")
+ (ctz:SI (match_dup 1)))]
+ ""
+ "bsf{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
+
+(define_expand "ffsdi2"
+ [(set (match_dup 2) (const_int -1))
+ (parallel [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "")
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "")
+ (ctz:DI (match_dup 1)))])
+ (set (match_dup 0) (if_then_else:DI
+ (eq (reg:CCZ FLAGS_REG) (const_int 0))
+ (match_dup 2)
+ (match_dup 0)))
+ (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+ "operands[2] = gen_reg_rtx (DImode);")
+
+(define_insn "*ffsdi_1"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm")
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (ctz:DI (match_dup 1)))]
+ "TARGET_64BIT"
+ "bsf{q}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
+
+(define_insn "ctzsi2"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "bsf{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
+
+(define_insn "ctzdi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ctz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "bsf{q}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")])
+
+(define_expand "clzsi2"
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (minus:SI (const_int 31)
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ if (TARGET_ABM)
+ {
+ emit_insn (gen_clzsi2_abm (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "clzsi2_abm"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ABM"
+ "lzcnt{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
+
+(define_insn "*bsr"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (minus:SI (const_int 31)
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "bsr{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")
+ (set_attr "mode" "SI")])
+
+(define_insn "popcount<mode>2"
+ [(set (match_operand:SWI248 0 "register_operand" "=r")
+ (popcount:SWI248
+ (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT"
+{
+#if TARGET_MACHO
+ return "popcnt\t{%1, %0|%0, %1}";
+#else
+ return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*popcount<mode>2_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (popcount:SWI248
+ (match_operand:SWI248 1 "nonimmediate_operand" "rm"))
+ (const_int 0)))
+ (set (match_operand:SWI248 0 "register_operand" "=r")
+ (popcount:SWI248 (match_dup 1)))]
+ "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+{
+#if TARGET_MACHO
+ return "popcnt\t{%1, %0|%0, %1}";
+#else
+ return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*popcountsi2_cmp_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI(popcount:SI (match_dup 1))))]
+ "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+{
+#if TARGET_MACHO
+ return "popcnt\t{%1, %0|%0, %1}";
+#else
+ return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
+
+(define_expand "bswapsi2"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (bswap:SI (match_operand:SI 1 "register_operand" "")))]
+ ""
+{
+ if (!TARGET_BSWAP)
+ {
+ rtx x = operands[0];
+
+ emit_move_insn (x, operands[1]);
+ emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+ emit_insn (gen_rotlsi3 (x, x, GEN_INT (16)));
+ emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x)));
+ DONE;
+ }
+})
+
+(define_insn "*bswapsi_1"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (bswap:SI (match_operand:SI 1 "register_operand" "0")))]
+ "TARGET_BSWAP"
+ "bswap\t%0"
+ [(set_attr "prefix_0f" "1")
+ (set_attr "length" "2")])
+
+(define_insn "*bswaphi_lowpart_1"
+ [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
+ (bswap:HI (match_dup 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_XCHGB || optimize_function_for_size_p (cfun)"
+ "@
+ xchg{b}\t{%h0, %b0|%b0, %h0}
+ rol{w}\t{$8, %0|%0, 8}"
+ [(set_attr "length" "2,4")
+ (set_attr "mode" "QI,HI")])
+
+(define_insn "bswaphi_lowpart"
+ [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r"))
+ (bswap:HI (match_dup 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "rol{w}\t{$8, %0|%0, 8}"
+ [(set_attr "length" "4")
+ (set_attr "mode" "HI")])
+
+(define_insn "bswapdi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (bswap:DI (match_operand:DI 1 "register_operand" "0")))]
+ "TARGET_64BIT"
+ "bswap\t%0"
+ [(set_attr "prefix_0f" "1")
+ (set_attr "length" "3")])
+
+(define_expand "clzdi2"
+ [(parallel
+ [(set (match_operand:DI 0 "register_operand" "")
+ (minus:DI (const_int 63)
+ (clz:DI (match_operand:DI 1 "nonimmediate_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT"
+{
+ if (TARGET_ABM)
+ {
+ emit_insn (gen_clzdi2_abm (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "clzdi2_abm"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_ABM"
+ "lzcnt{q}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "DI")])
+
+(define_insn "*bsr_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (minus:DI (const_int 63)
+ (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "bsr{q}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")
+ (set_attr "mode" "DI")])
+
+(define_expand "clzhi2"
+ [(parallel
+ [(set (match_operand:HI 0 "register_operand" "")
+ (minus:HI (const_int 15)
+ (clz:HI (match_operand:HI 1 "nonimmediate_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 0) (xor:HI (match_dup 0) (const_int 15)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ if (TARGET_ABM)
+ {
+ emit_insn (gen_clzhi2_abm (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "clzhi2_abm"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ABM"
+ "lzcnt{w}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "HI")])
+
+(define_insn "*bsrhi"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (minus:HI (const_int 15)
+ (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "bsr{w}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")
+ (set_attr "mode" "HI")])
+
+(define_expand "paritydi2"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (parity:DI (match_operand:DI 1 "register_operand" "")))]
+ "! TARGET_POPCNT"
+{
+ rtx scratch = gen_reg_rtx (QImode);
+ rtx cond;
+
+ emit_insn (gen_paritydi2_cmp (NULL_RTX, NULL_RTX,
+ NULL_RTX, operands[1]));
+
+ cond = gen_rtx_fmt_ee (ORDERED, QImode,
+ gen_rtx_REG (CCmode, FLAGS_REG),
+ const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, cond));
+
+ if (TARGET_64BIT)
+ emit_insn (gen_zero_extendqidi2 (operands[0], scratch));
+ else
+ {
+ rtx tmp = gen_reg_rtx (SImode);
+
+ emit_insn (gen_zero_extendqisi2 (tmp, scratch));
+ emit_insn (gen_zero_extendsidi2 (operands[0], tmp));
+ }
+ DONE;
+})
+
+(define_insn_and_split "paritydi2_cmp"
+ [(set (reg:CC FLAGS_REG)
+ (parity:CC (match_operand:DI 3 "register_operand" "0")))
+ (clobber (match_scratch:DI 0 "=r"))
+ (clobber (match_scratch:SI 1 "=&r"))
+ (clobber (match_scratch:HI 2 "=Q"))]
+ "! TARGET_POPCNT"
+ "#"
+ "&& reload_completed"
+ [(parallel
+ [(set (match_dup 1)
+ (xor:SI (match_dup 1) (match_dup 4)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (reg:CC FLAGS_REG)
+ (parity:CC (match_dup 1)))
+ (clobber (match_dup 1))
+ (clobber (match_dup 2))])]
+{
+ operands[4] = gen_lowpart (SImode, operands[3]);
+
+ if (TARGET_64BIT)
+ {
+ emit_move_insn (operands[1], gen_lowpart (SImode, operands[3]));
+ emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32)));
+ }
+ else
+ operands[1] = gen_highpart (SImode, operands[3]);
+})
+
+(define_expand "paritysi2"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (parity:SI (match_operand:SI 1 "register_operand" "")))]
+ "! TARGET_POPCNT"
+{
+ rtx scratch = gen_reg_rtx (QImode);
+ rtx cond;
+
+ emit_insn (gen_paritysi2_cmp (NULL_RTX, NULL_RTX, operands[1]));
+
+ cond = gen_rtx_fmt_ee (ORDERED, QImode,
+ gen_rtx_REG (CCmode, FLAGS_REG),
+ const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, scratch, cond));
+
+ emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
+ DONE;
+})
+
+(define_insn_and_split "paritysi2_cmp"
+ [(set (reg:CC FLAGS_REG)
+ (parity:CC (match_operand:SI 2 "register_operand" "0")))
+ (clobber (match_scratch:SI 0 "=r"))
+ (clobber (match_scratch:HI 1 "=&Q"))]
+ "! TARGET_POPCNT"
+ "#"
+ "&& reload_completed"
+ [(parallel
+ [(set (match_dup 1)
+ (xor:HI (match_dup 1) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (reg:CC FLAGS_REG)
+ (parity:CC (match_dup 1)))
+ (clobber (match_dup 1))])]
+{
+ operands[3] = gen_lowpart (HImode, operands[2]);
+
+ emit_move_insn (operands[1], gen_lowpart (HImode, operands[2]));
+ emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16)));
+})
+
+(define_insn "*parityhi2_cmp"
+ [(set (reg:CC FLAGS_REG)
+ (parity:CC (match_operand:HI 1 "register_operand" "0")))
+ (clobber (match_scratch:HI 0 "=Q"))]
+ "! TARGET_POPCNT"
+ "xor{b}\t{%h0, %b0|%b0, %h0}"
+ [(set_attr "length" "2")
+ (set_attr "mode" "HI")])
+
+(define_insn "*parityqi2_cmp"
+ [(set (reg:CC FLAGS_REG)
+ (parity:CC (match_operand:QI 0 "register_operand" "q")))]
+ "! TARGET_POPCNT"
+ "test{b}\t%0, %0"
+ [(set_attr "length" "2")
+ (set_attr "mode" "QI")])
+
+;; Thread-local storage patterns for ELF.
+;;
+;; Note that these code sequences must appear exactly as shown
+;; in order to allow linker relaxation.
+
+(define_insn "*tls_global_dynamic_32_gnu"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "tls_symbolic_operand" "")
+ (match_operand:SI 3 "call_insn_operand" "")]
+ UNSPEC_TLS_GD))
+ (clobber (match_scratch:SI 4 "=d"))
+ (clobber (match_scratch:SI 5 "=c"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_GNU_TLS"
+ "lea{l}\t{%a2@TLSGD(,%1,1), %0|%0, %a2@TLSGD[%1*1]}\;call\t%P3"
+ [(set_attr "type" "multi")
+ (set_attr "length" "12")])
+
+(define_insn "*tls_global_dynamic_32_sun"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "tls_symbolic_operand" "")
+ (match_operand:SI 3 "call_insn_operand" "")]
+ UNSPEC_TLS_GD))
+ (clobber (match_scratch:SI 4 "=d"))
+ (clobber (match_scratch:SI 5 "=c"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_SUN_TLS"
+ "lea{l}\t{%a2@DTLNDX(%1), %4|%4, %a2@DTLNDX[%1]}
+ push{l}\t%4\;call\t%a2@TLSPLT\;pop{l}\t%4\;nop"
+ [(set_attr "type" "multi")
+ (set_attr "length" "14")])
+
+(define_expand "tls_global_dynamic_32"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (unspec:SI
+ [(match_dup 2)
+ (match_operand:SI 1 "tls_symbolic_operand" "")
+ (match_dup 3)]
+ UNSPEC_TLS_GD))
+ (clobber (match_scratch:SI 4 ""))
+ (clobber (match_scratch:SI 5 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ if (flag_pic)
+ operands[2] = pic_offset_table_rtx;
+ else
+ {
+ operands[2] = gen_reg_rtx (Pmode);
+ emit_insn (gen_set_got (operands[2]));
+ }
+ if (TARGET_GNU2_TLS)
+ {
+ emit_insn (gen_tls_dynamic_gnu2_32
+ (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ operands[3] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_global_dynamic_64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (call:DI (mem:QI (match_operand:DI 2 "call_insn_operand" ""))
+ (match_operand:DI 3 "" "")))
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLS_GD)]
+ "TARGET_64BIT"
+ ".byte\t0x66\;lea{q}\t{%a1@TLSGD(%%rip), %%rdi|rdi, %a1@TLSGD[rip]}\;.word\t0x6666\;rex64\;call\t%P2"
+ [(set_attr "type" "multi")
+ (set_attr "length" "16")])
+
+(define_expand "tls_global_dynamic_64"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (call:DI (mem:QI (match_dup 2)) (const_int 0)))
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLS_GD)])]
+ ""
+{
+ if (TARGET_GNU2_TLS)
+ {
+ emit_insn (gen_tls_dynamic_gnu2_64
+ (operands[0], operands[1]));
+ DONE;
+ }
+ operands[2] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_local_dynamic_base_32_gnu"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "call_insn_operand" "")]
+ UNSPEC_TLS_LD_BASE))
+ (clobber (match_scratch:SI 3 "=d"))
+ (clobber (match_scratch:SI 4 "=c"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_GNU_TLS"
+ "lea{l}\t{%&@TLSLDM(%1), %0|%0, %&@TLSLDM[%1]}\;call\t%P2"
+ [(set_attr "type" "multi")
+ (set_attr "length" "11")])
+
+(define_insn "*tls_local_dynamic_base_32_sun"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "call_insn_operand" "")]
+ UNSPEC_TLS_LD_BASE))
+ (clobber (match_scratch:SI 3 "=d"))
+ (clobber (match_scratch:SI 4 "=c"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_SUN_TLS"
+ "lea{l}\t{%&@TMDNX(%1), %3|%3, %&@TMDNX[%1]}
+ push{l}\t%3\;call\t%&@TLSPLT\;pop{l}\t%3"
+ [(set_attr "type" "multi")
+ (set_attr "length" "13")])
+
+(define_expand "tls_local_dynamic_base_32"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (unspec:SI [(match_dup 1) (match_dup 2)]
+ UNSPEC_TLS_LD_BASE))
+ (clobber (match_scratch:SI 3 ""))
+ (clobber (match_scratch:SI 4 ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ if (flag_pic)
+ operands[1] = pic_offset_table_rtx;
+ else
+ {
+ operands[1] = gen_reg_rtx (Pmode);
+ emit_insn (gen_set_got (operands[1]));
+ }
+ if (TARGET_GNU2_TLS)
+ {
+ emit_insn (gen_tls_dynamic_gnu2_32
+ (operands[0], ix86_tls_module_base (), operands[1]));
+ DONE;
+ }
+ operands[2] = ix86_tls_get_addr ();
+})
+
+(define_insn "*tls_local_dynamic_base_64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (call:DI (mem:QI (match_operand:DI 1 "call_insn_operand" ""))
+ (match_operand:DI 2 "" "")))
+ (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)]
+ "TARGET_64BIT"
+ "lea{q}\t{%&@TLSLD(%%rip), %%rdi|rdi, %&@TLSLD[rip]}\;call\t%P1"
+ [(set_attr "type" "multi")
+ (set_attr "length" "12")])
+
+(define_expand "tls_local_dynamic_base_64"
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (call:DI (mem:QI (match_dup 1)) (const_int 0)))
+ (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
+ ""
+{
+ if (TARGET_GNU2_TLS)
+ {
+ emit_insn (gen_tls_dynamic_gnu2_64
+ (operands[0], ix86_tls_module_base ()));
+ DONE;
+ }
+ operands[1] = ix86_tls_get_addr ();
+})
+
+;; Local dynamic of a single variable is a lose. Show combine how
+;; to convert that back to global dynamic.
+
+(define_insn_and_split "*tls_local_dynamic_32_once"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (plus:SI (unspec:SI [(match_operand:SI 1 "register_operand" "b")
+ (match_operand:SI 2 "call_insn_operand" "")]
+ UNSPEC_TLS_LD_BASE)
+ (const:SI (unspec:SI
+ [(match_operand:SI 3 "tls_symbolic_operand" "")]
+ UNSPEC_DTPOFF))))
+ (clobber (match_scratch:SI 4 "=d"))
+ (clobber (match_scratch:SI 5 "=c"))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "#"
+ ""
+ [(parallel [(set (match_dup 0)
+ (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)]
+ UNSPEC_TLS_GD))
+ (clobber (match_dup 4))
+ (clobber (match_dup 5))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+;; Load and add the thread base pointer from %gs:0.
+
+(define_insn "*load_tp_si"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(const_int 0)] UNSPEC_TP))]
+ "!TARGET_64BIT"
+ "mov{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0")
+ (set_attr "length" "7")
+ (set_attr "memory" "load")
+ (set_attr "imm_disp" "false")])
+
+(define_insn "*add_tp_si"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP)
+ (match_operand:SI 1 "register_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ "add{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}"
+ [(set_attr "type" "alu")
+ (set_attr "modrm" "0")
+ (set_attr "length" "7")
+ (set_attr "memory" "load")
+ (set_attr "imm_disp" "false")])
+
+(define_insn "*load_tp_di"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(const_int 0)] UNSPEC_TP))]
+ "TARGET_64BIT"
+ "mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}"
+ [(set_attr "type" "imov")
+ (set_attr "modrm" "0")
+ (set_attr "length" "7")
+ (set_attr "memory" "load")
+ (set_attr "imm_disp" "false")])
+
+(define_insn "*add_tp_di"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP)
+ (match_operand:DI 1 "register_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "add{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}"
+ [(set_attr "type" "alu")
+ (set_attr "modrm" "0")
+ (set_attr "length" "7")
+ (set_attr "memory" "load")
+ (set_attr "imm_disp" "false")])
+
+;; GNU2 TLS patterns can be split.
+
+(define_expand "tls_dynamic_gnu2_32"
+ [(set (match_dup 3)
+ (plus:SI (match_operand:SI 2 "register_operand" "")
+ (const:SI
+ (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLSDESC))))
+ (parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (unspec:SI [(match_dup 1) (match_dup 3)
+ (match_dup 2) (reg:SI SP_REG)]
+ UNSPEC_TLSDESC))
+ (clobber (reg:CC FLAGS_REG))])]
+ "!TARGET_64BIT && TARGET_GNU2_TLS"
+{
+ operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_lea_32"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_operand:SI 1 "register_operand" "b")
+ (const:SI
+ (unspec:SI [(match_operand:SI 2 "tls_symbolic_operand" "")]
+ UNSPEC_TLSDESC))))]
+ "!TARGET_64BIT && TARGET_GNU2_TLS"
+ "lea{l}\t{%a2@TLSDESC(%1), %0|%0, %a2@TLSDESC[%1]}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "SI")
+ (set_attr "length" "6")
+ (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_32"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")
+ (match_operand:SI 2 "register_operand" "0")
+ ;; we have to make sure %ebx still points to the GOT
+ (match_operand:SI 3 "register_operand" "b")
+ (reg:SI SP_REG)]
+ UNSPEC_TLSDESC))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_GNU2_TLS"
+ "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
+ [(set_attr "type" "call")
+ (set_attr "length" "2")
+ (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_32"
+ [(set (match_operand:SI 0 "register_operand" "=&a")
+ (plus:SI
+ (unspec:SI [(match_operand:SI 3 "tls_modbase_operand" "")
+ (match_operand:SI 4 "" "")
+ (match_operand:SI 2 "register_operand" "b")
+ (reg:SI SP_REG)]
+ UNSPEC_TLSDESC)
+ (const:SI (unspec:SI
+ [(match_operand:SI 1 "tls_symbolic_operand" "")]
+ UNSPEC_DTPOFF))))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_GNU2_TLS"
+ "#"
+ ""
+ [(set (match_dup 0) (match_dup 5))]
+{
+ operands[5] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+ emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
+})
+
+(define_expand "tls_dynamic_gnu2_64"
+ [(set (match_dup 2)
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLSDESC))
+ (parallel
+ [(set (match_operand:DI 0 "register_operand" "")
+ (unspec:DI [(match_dup 1) (match_dup 2) (reg:DI SP_REG)]
+ UNSPEC_TLSDESC))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT && TARGET_GNU2_TLS"
+{
+ operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+ ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_lea_64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_TLSDESC))]
+ "TARGET_64BIT && TARGET_GNU2_TLS"
+ "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[rip]}"
+ [(set_attr "type" "lea")
+ (set_attr "mode" "DI")
+ (set_attr "length" "7")
+ (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")
+ (match_operand:DI 2 "register_operand" "0")
+ (reg:DI SP_REG)]
+ UNSPEC_TLSDESC))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_GNU2_TLS"
+ "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
+ [(set_attr "type" "call")
+ (set_attr "length" "2")
+ (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_64"
+ [(set (match_operand:DI 0 "register_operand" "=&a")
+ (plus:DI
+ (unspec:DI [(match_operand:DI 2 "tls_modbase_operand" "")
+ (match_operand:DI 3 "" "")
+ (reg:DI SP_REG)]
+ UNSPEC_TLSDESC)
+ (const:DI (unspec:DI
+ [(match_operand:DI 1 "tls_symbolic_operand" "")]
+ UNSPEC_DTPOFF))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_GNU2_TLS"
+ "#"
+ ""
+ [(set (match_dup 0) (match_dup 4))]
+{
+ operands[4] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+ emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1]));
+})
+
+;;
+
+;; These patterns match the binary 387 instructions for addM3, subM3,
+;; mulM3 and divM3. There are three patterns for each of DFmode and
+;; SFmode. The first is the normal insn, the second the same insn but
+;; with one operand a conversion, and the third the same insn but with
+;; the other operand a conversion. The conversion may be SFmode or
+;; SImode if the target mode DFmode, but only SImode if the target mode
+;; is SFmode.
+
+;; Gcc is slightly more smart about handling normal two address instructions
+;; so use special patterns for add and mull.
+
+(define_insn "*fop_<mode>_comm_mixed_avx"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x")
+ (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (eq_attr "alternative" "1")
+ (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd"))
+ (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+ (const_string "fmul")
+ (const_string "fop"))))
+ (set_attr "prefix" "orig,maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_mixed"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (eq_attr "alternative" "1")
+ (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd"))
+ (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+ (const_string "fmul")
+ (const_string "fop"))))
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_avx"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "nonimmediate_operand" "%x")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd")))
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_sse"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "nonimmediate_operand" "%0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (const_string "sseadd")))
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_i387"
+ [(set (match_operand:MODEF 0 "register_operand" "=f")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "nonimmediate_operand" "%0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "fm")]))]
+ "TARGET_80387
+ && COMMUTATIVE_ARITH_P (operands[3])
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+ (const_string "fmul")
+ (const_string "fop")))
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_1_mixed_avx"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,f,x")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,x")
+ (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+ && !COMMUTATIVE_ARITH_P (operands[3])
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(and (eq_attr "alternative" "2")
+ (match_operand:MODEF 3 "mult_operator" ""))
+ (const_string "ssemul")
+ (and (eq_attr "alternative" "2")
+ (match_operand:MODEF 3 "div_operator" ""))
+ (const_string "ssediv")
+ (eq_attr "alternative" "2")
+ (const_string "sseadd")
+ (match_operand:MODEF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:MODEF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "prefix" "orig,orig,maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_1_mixed"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,f,x")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+ && !COMMUTATIVE_ARITH_P (operands[3])
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(and (eq_attr "alternative" "2")
+ (match_operand:MODEF 3 "mult_operator" ""))
+ (const_string "ssemul")
+ (and (eq_attr "alternative" "2")
+ (match_operand:MODEF 3 "div_operator" ""))
+ (const_string "ssediv")
+ (eq_attr "alternative" "2")
+ (const_string "sseadd")
+ (match_operand:MODEF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:MODEF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*rcpsf2_sse"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RCP))]
+ "TARGET_SSE_MATH"
+ "%vrcpss\t{%1, %d0|%d0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SF")])
+
+(define_insn "*fop_<mode>_1_avx"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "register_operand" "x")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !COMMUTATIVE_ARITH_P (operands[3])"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:MODEF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (match_operand:MODEF 3 "div_operator" "")
+ (const_string "ssediv")
+ ]
+ (const_string "sseadd")))
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_1_sse"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "register_operand" "0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !COMMUTATIVE_ARITH_P (operands[3])"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:MODEF 3 "mult_operator" "")
+ (const_string "ssemul")
+ (match_operand:MODEF 3 "div_operator" "")
+ (const_string "ssediv")
+ ]
+ (const_string "sseadd")))
+ (set_attr "mode" "<MODE>")])
+
+;; This pattern is not fully shadowed by the pattern above.
+(define_insn "*fop_<mode>_1_i387"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm")
+ (match_operand:MODEF 2 "nonimmediate_operand" "fm,0")]))]
+ "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ && !COMMUTATIVE_ARITH_P (operands[3])
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:MODEF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:MODEF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "<MODE>")])
+
+;; ??? Add SSE splitters for these!
+(define_insn "*fop_<MODEF:mode>_2_i387"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(float:MODEF
+ (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
+ (match_operand:MODEF 2 "register_operand" "0,0")]))]
+ "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+ && (TARGET_USE_<X87MODEI12:MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+ "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:MODEF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:MODEF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "fp_int_src" "true")
+ (set_attr "mode" "<X87MODEI12:MODE>")])
+
+(define_insn "*fop_<MODEF:mode>_3_i387"
+ [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+ (match_operator:MODEF 3 "binary_fp_operator"
+ [(match_operand:MODEF 1 "register_operand" "0,0")
+ (float:MODEF
+ (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+ "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+ && (TARGET_USE_<X87MODEI12:MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+ "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:MODEF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:MODEF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "fp_int_src" "true")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_df_4_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(float_extend:DF
+ (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
+ (match_operand:DF 2 "register_operand" "0,f")]))]
+ "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:DF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
+(define_insn "*fop_df_5_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(match_operand:DF 1 "register_operand" "0,f")
+ (float_extend:DF
+ (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+ "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:DF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
+(define_insn "*fop_df_6_i387"
+ [(set (match_operand:DF 0 "register_operand" "=f,f")
+ (match_operator:DF 3 "binary_fp_operator"
+ [(float_extend:DF
+ (match_operand:SF 1 "register_operand" "0,f"))
+ (float_extend:DF
+ (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))]
+ "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:DF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:DF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "SF")])
+
+(define_insn "*fop_xf_comm_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(match_operand:XF 1 "register_operand" "%0")
+ (match_operand:XF 2 "register_operand" "f")]))]
+ "TARGET_80387
+ && COMMUTATIVE_ARITH_P (operands[3])"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (if_then_else (match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (const_string "fop")))
+ (set_attr "mode" "XF")])
+
+(define_insn "*fop_xf_1_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(match_operand:XF 1 "register_operand" "0,f")
+ (match_operand:XF 2 "register_operand" "f,0")]))]
+ "TARGET_80387
+ && !COMMUTATIVE_ARITH_P (operands[3])"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:XF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "XF")])
+
+(define_insn "*fop_xf_2_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(float:XF
+ (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
+ (match_operand:XF 2 "register_operand" "0,0")]))]
+ "TARGET_80387 && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+ "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:XF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "fp_int_src" "true")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_3_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(match_operand:XF 1 "register_operand" "0,0")
+ (float:XF
+ (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+ "TARGET_80387 && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
+ "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:XF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "fp_int_src" "true")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_4_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(float_extend:XF
+ (match_operand:MODEF 1 "nonimmediate_operand" "fm,0"))
+ (match_operand:XF 2 "register_operand" "0,f")]))]
+ "TARGET_80387"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:XF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_5_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(match_operand:XF 1 "register_operand" "0,f")
+ (float_extend:XF
+ (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
+ "TARGET_80387"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:XF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_xf_6_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (match_operator:XF 3 "binary_fp_operator"
+ [(float_extend:XF
+ (match_operand:MODEF 1 "register_operand" "0,f"))
+ (float_extend:XF
+ (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
+ "TARGET_80387"
+ "* return output_387_binary_op (insn, operands);"
+ [(set (attr "type")
+ (cond [(match_operand:XF 3 "mult_operator" "")
+ (const_string "fmul")
+ (match_operand:XF 3 "div_operator" "")
+ (const_string "fdiv")
+ ]
+ (const_string "fop")))
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (match_operator 3 "binary_fp_operator"
+ [(float (match_operand:X87MODEI12 1 "register_operand" ""))
+ (match_operand 2 "register_operand" "")]))]
+ "reload_completed
+ && X87_FLOAT_MODE_P (GET_MODE (operands[0]))"
+ [(const_int 0)]
+{
+ operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]);
+ operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_fmt_ee (GET_CODE (operands[3]),
+ GET_MODE (operands[3]),
+ operands[4],
+ operands[2])));
+ ix86_free_from_memory (GET_MODE (operands[1]));
+ DONE;
+})
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (match_operator 3 "binary_fp_operator"
+ [(match_operand 1 "register_operand" "")
+ (float (match_operand:X87MODEI12 2 "register_operand" ""))]))]
+ "reload_completed
+ && X87_FLOAT_MODE_P (GET_MODE (operands[0]))"
+ [(const_int 0)]
+{
+ operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]);
+ operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ gen_rtx_fmt_ee (GET_CODE (operands[3]),
+ GET_MODE (operands[3]),
+ operands[1],
+ operands[4])));
+ ix86_free_from_memory (GET_MODE (operands[2]));
+ DONE;
+})
+
+;; FPU special functions.
+
+;; This pattern implements a no-op XFmode truncation for
+;; all fancy i386 XFmode math functions.
+
+(define_insn "truncxf<mode>2_i387_noop_unspec"
+ [(set (match_operand:MODEF 0 "register_operand" "=f")
+ (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_TRUNC_NOOP))]
+ "TARGET_USE_FANCY_MATH_387"
+ "* return output_387_reg_move (insn, operands);"
+ [(set_attr "type" "fmov")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sqrtxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (sqrt:XF (match_operand:XF 1 "register_operand" "0")))]
+ "TARGET_USE_FANCY_MATH_387"
+ "fsqrt"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "sqrt_extend<mode>xf2_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (sqrt:XF
+ (float_extend:XF
+ (match_operand:MODEF 1 "register_operand" "0"))))]
+ "TARGET_USE_FANCY_MATH_387"
+ "fsqrt"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
+
+(define_insn "*rsqrtsf2_sse"
+ [(set (match_operand:SF 0 "register_operand" "=x")
+ (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RSQRT))]
+ "TARGET_SSE_MATH"
+ "%vrsqrtss\t{%1, %d0|%d0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SF")])
+
+(define_expand "rsqrtsf2"
+ [(set (match_operand:SF 0 "register_operand" "")
+ (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "")]
+ UNSPEC_RSQRT))]
+ "TARGET_SSE_MATH"
+{
+ ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1);
+ DONE;
+})
+
+(define_insn "*sqrt<mode>2_sse"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (sqrt:MODEF
+ (match_operand:MODEF 1 "nonimmediate_operand" "xm")))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")
+ (set_attr "athlon_decode" "*")
+ (set_attr "amdfam10_decode" "*")])
+
+(define_expand "sqrt<mode>2"
+ [(set (match_operand:MODEF 0 "register_operand" "")
+ (sqrt:MODEF
+ (match_operand:MODEF 1 "nonimmediate_operand" "")))]
+ "TARGET_USE_FANCY_MATH_387
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+ if (<MODE>mode == SFmode
+ && TARGET_SSE_MATH && TARGET_RECIP && !optimize_function_for_size_p (cfun)
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations)
+ {
+ ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0);
+ DONE;
+ }
+
+ if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+ {
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = force_reg (<MODE>mode, operands[1]);
+
+ emit_insn (gen_sqrt_extend<mode>xf2_i387 (op0, op1));
+ emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0));
+ DONE;
+ }
+})
+
+(define_insn "fpremxf4_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")
+ (match_operand:XF 3 "register_operand" "1")]
+ UNSPEC_FPREM_F))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2) (match_dup 3)]
+ UNSPEC_FPREM_U))
+ (set (reg:CCFP FPSR_REG)
+ (unspec:CCFP [(match_dup 2) (match_dup 3)]
+ UNSPEC_C2_FLAG))]
+ "TARGET_USE_FANCY_MATH_387"
+ "fprem"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "fmodxf3"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "general_operand" ""))
+ (use (match_operand:XF 2 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387"
+{
+ rtx label = gen_label_rtx ();
+
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = gen_reg_rtx (XFmode);
+
+ emit_move_insn (op2, operands[2]);
+ emit_move_insn (op1, operands[1]);
+
+ emit_label (label);
+ emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
+ ix86_emit_fp_unordered_jump (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_expand "fmod<mode>3"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "general_operand" ""))
+ (use (match_operand:MODEF 2 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387"
+{
+ rtx label = gen_label_rtx ();
+
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+
+ emit_label (label);
+ emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
+ ix86_emit_fp_unordered_jump (label);
+ LABEL_NUSES (label) = 1;
+
+ /* Truncate the result properly for strict SSE math. */
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !TARGET_MIX_SSE_I387)
+ emit_insn (gen_truncxf<mode>2 (operands[0], op1));
+ else
+ emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+
+ DONE;
+})
+
+(define_insn "fprem1xf4_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")
+ (match_operand:XF 3 "register_operand" "1")]
+ UNSPEC_FPREM1_F))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2) (match_dup 3)]
+ UNSPEC_FPREM1_U))
+ (set (reg:CCFP FPSR_REG)
+ (unspec:CCFP [(match_dup 2) (match_dup 3)]
+ UNSPEC_C2_FLAG))]
+ "TARGET_USE_FANCY_MATH_387"
+ "fprem1"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "remainderxf3"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "general_operand" ""))
+ (use (match_operand:XF 2 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387"
+{
+ rtx label = gen_label_rtx ();
+
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = gen_reg_rtx (XFmode);
+
+ emit_move_insn (op2, operands[2]);
+ emit_move_insn (op1, operands[1]);
+
+ emit_label (label);
+ emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
+ ix86_emit_fp_unordered_jump (label);
+ LABEL_NUSES (label) = 1;
+
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_expand "remainder<mode>3"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "general_operand" ""))
+ (use (match_operand:MODEF 2 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387"
+{
+ rtx label = gen_label_rtx ();
+
+ rtx op1 = gen_reg_rtx (XFmode);
+ rtx op2 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+
+ emit_label (label);
+
+ emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
+ ix86_emit_fp_unordered_jump (label);
+ LABEL_NUSES (label) = 1;
+
+ /* Truncate the result properly for strict SSE math. */
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !TARGET_MIX_SSE_I387)
+ emit_insn (gen_truncxf<mode>2 (operands[0], op1));
+ else
+ emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op1));
+
+ DONE;
+})
+
+(define_insn "*sinxf2_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_SIN))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fsin"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_insn "*sin_extend<mode>xf2_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(float_extend:XF
+ (match_operand:MODEF 1 "register_operand" "0"))]
+ UNSPEC_SIN))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fsin"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_insn "*cosxf2_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fcos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_insn "*cos_extend<mode>xf2_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(float_extend:XF
+ (match_operand:MODEF 1 "register_operand" "0"))]
+ UNSPEC_COS))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fcos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+;; When sincos pattern is defined, sin and cos builtin functions will be
+;; expanded to sincos pattern with one of its outputs left unused.
+;; CSE pass will figure out if two sincos patterns can be combined,
+;; otherwise sincos pattern will be split back to sin or cos pattern,
+;; depending on the unused output.
+
+(define_insn "sincosxf3"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fsincos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_split
+ [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "")]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:XF 1 "register_operand" "")
+ (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+ "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
+ && !(reload_completed || reload_in_progress)"
+ [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))]
+ "")
+
+(define_split
+ [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "")]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:XF 1 "register_operand" "")
+ (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
+ "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
+ && !(reload_completed || reload_in_progress)"
+ [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))]
+ "")
+
+(define_insn "sincos_extend<mode>xf3_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(float_extend:XF
+ (match_operand:MODEF 2 "register_operand" "0"))]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fsincos"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_split
+ [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(float_extend:XF
+ (match_operand:MODEF 2 "register_operand" ""))]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:XF 1 "register_operand" "")
+ (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
+ "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
+ && !(reload_completed || reload_in_progress)"
+ [(set (match_dup 1) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))]
+ "")
+
+(define_split
+ [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(float_extend:XF
+ (match_operand:MODEF 2 "register_operand" ""))]
+ UNSPEC_SINCOS_COS))
+ (set (match_operand:XF 1 "register_operand" "")
+ (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
+ "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
+ && !(reload_completed || reload_in_progress)"
+ [(set (match_dup 0) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))]
+ "")
+
+(define_expand "sincos<mode>3"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))
+ (use (match_operand:MODEF 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_sincos_extend<mode>xf3_i387 (op0, op1, operands[2]));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[1], op1));
+ DONE;
+})
+
+(define_insn "fptanxf4_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (match_operand:XF 3 "const_double_operand" "F"))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+ UNSPEC_TAN))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && standard_80387_constant_p (operands[3]) == 2"
+ "fptan"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_insn "fptan_extend<mode>xf4_i387"
+ [(set (match_operand:MODEF 0 "register_operand" "=f")
+ (match_operand:MODEF 3 "const_double_operand" "F"))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(float_extend:XF
+ (match_operand:MODEF 2 "register_operand" "0"))]
+ UNSPEC_TAN))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations
+ && standard_80387_constant_p (operands[3]) == 2"
+ "fptan"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "tanxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ rtx one = gen_reg_rtx (XFmode);
+ rtx op2 = CONST1_RTX (XFmode); /* fld1 */
+
+ emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], op2));
+ DONE;
+})
+
+(define_expand "tan<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+
+ rtx one = gen_reg_rtx (<MODE>mode);
+ rtx op2 = CONST1_RTX (<MODE>mode); /* fld1 */
+
+ emit_insn (gen_fptan_extend<mode>xf4_i387 (one, op0,
+ operands[1], op2));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_insn "*fpatanxf3_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")
+ (match_operand:XF 2 "register_operand" "u")]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 3 "=2"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fpatan"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_insn "fpatan_extend<mode>xf3_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(float_extend:XF
+ (match_operand:MODEF 1 "register_operand" "0"))
+ (float_extend:XF
+ (match_operand:MODEF 2 "register_operand" "u"))]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 3 "=2"))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fpatan"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "atan2xf3"
+ [(parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "")
+ (match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 3 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "")
+
+(define_expand "atan2<mode>3"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))
+ (use (match_operand:MODEF 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, operands[2], operands[1]));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "atanxf2"
+ [(parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_dup 2)
+ (match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 3 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+ emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "atan<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+
+ rtx op2 = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (op2, CONST1_RTX (<MODE>mode)); /* fld1 */
+
+ emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, op2, operands[1]));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "asinxf2"
+ [(set (match_dup 2)
+ (mult:XF (match_operand:XF 1 "register_operand" "")
+ (match_dup 1)))
+ (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
+ (set (match_dup 5) (sqrt:XF (match_dup 4)))
+ (parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_dup 5) (match_dup 1)]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 6 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ for (i = 2; i < 6; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+
+ emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "asin<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_asinxf2 (op0, op1));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "acosxf2"
+ [(set (match_dup 2)
+ (mult:XF (match_operand:XF 1 "register_operand" "")
+ (match_dup 1)))
+ (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2)))
+ (set (match_dup 5) (sqrt:XF (match_dup 4)))
+ (parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_dup 1) (match_dup 5)]
+ UNSPEC_FPATAN))
+ (clobber (match_scratch:XF 6 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ for (i = 2; i < 6; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+
+ emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "acos<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_acosxf2 (op0, op1));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_insn "fyl2xxf3_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")
+ (match_operand:XF 2 "register_operand" "u")]
+ UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 3 "=2"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fyl2x"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_insn "fyl2x_extend<mode>xf3_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(float_extend:XF
+ (match_operand:MODEF 1 "register_operand" "0"))
+ (match_operand:XF 2 "register_operand" "u")]
+ UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 3 "=2"))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fyl2x"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "logxf2"
+ [(parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")
+ (match_dup 2)] UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 3 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+ emit_move_insn (operands[2], standard_80387_constant_rtx (4)); /* fldln2 */
+})
+
+(define_expand "log<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+
+ rtx op2 = gen_reg_rtx (XFmode);
+ emit_move_insn (op2, standard_80387_constant_rtx (4)); /* fldln2 */
+
+ emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "log10xf2"
+ [(parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")
+ (match_dup 2)] UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 3 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+ emit_move_insn (operands[2], standard_80387_constant_rtx (3)); /* fldlg2 */
+})
+
+(define_expand "log10<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+
+ rtx op2 = gen_reg_rtx (XFmode);
+ emit_move_insn (op2, standard_80387_constant_rtx (3)); /* fldlg2 */
+
+ emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "log2xf2"
+ [(parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")
+ (match_dup 2)] UNSPEC_FYL2X))
+ (clobber (match_scratch:XF 3 ""))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+ emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "log2<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+
+ rtx op2 = gen_reg_rtx (XFmode);
+ emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */
+
+ emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_insn "fyl2xp1xf3_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")
+ (match_operand:XF 2 "register_operand" "u")]
+ UNSPEC_FYL2XP1))
+ (clobber (match_scratch:XF 3 "=2"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fyl2xp1"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_insn "fyl2xp1_extend<mode>xf3_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(float_extend:XF
+ (match_operand:MODEF 1 "register_operand" "0"))
+ (match_operand:XF 2 "register_operand" "u")]
+ UNSPEC_FYL2XP1))
+ (clobber (match_scratch:XF 3 "=2"))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fyl2xp1"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "log1pxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ ix86_emit_i387_log1p (operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "log1p<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op0 = gen_reg_rtx (XFmode);
+
+ operands[1] = gen_rtx_FLOAT_EXTEND (XFmode, operands[1]);
+
+ ix86_emit_i387_log1p (op0, operands[1]);
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_insn "fxtractxf3_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")]
+ UNSPEC_XTRACT_FRACT))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fxtract"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_insn "fxtract_extend<mode>xf3_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(float_extend:XF
+ (match_operand:MODEF 2 "register_operand" "0"))]
+ UNSPEC_XTRACT_FRACT))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_XTRACT_EXP))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "fxtract"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "logbxf2"
+ [(parallel [(set (match_dup 2)
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_XTRACT_FRACT))
+ (set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ operands[2] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "logb<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op1));
+ DONE;
+})
+
+(define_expand "ilogbxf2"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0, op1;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op0 = gen_reg_rtx (XFmode);
+ op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1]));
+ emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
+ DONE;
+})
+
+(define_expand "ilogb<mode>2"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0, op1;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op0 = gen_reg_rtx (XFmode);
+ op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
+ emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
+ DONE;
+})
+
+(define_insn "*f2xm1xf2_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_F2XM1))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "f2xm1"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_insn "*fscalexf4_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 2 "register_operand" "0")
+ (match_operand:XF 3 "register_operand" "1")]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_operand:XF 1 "register_operand" "=u")
+ (unspec:XF [(match_dup 2) (match_dup 3)]
+ UNSPEC_FSCALE_EXP))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fscale"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "expNcorexf3"
+ [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "")
+ (match_operand:XF 2 "register_operand" "")))
+ (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
+ (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
+ (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
+ (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7)))
+ (parallel [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_dup 8) (match_dup 4)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 9)
+ (unspec:XF [(match_dup 8) (match_dup 4)]
+ UNSPEC_FSCALE_EXP))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ for (i = 3; i < 10; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+
+ emit_move_insn (operands[7], CONST1_RTX (XFmode)); /* fld1 */
+})
+
+(define_expand "expxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op2;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op2 = gen_reg_rtx (XFmode);
+ emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */
+
+ emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
+ DONE;
+})
+
+(define_expand "exp<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0, op1;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op0 = gen_reg_rtx (XFmode);
+ op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_expxf2 (op0, op1));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "exp10xf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op2;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op2 = gen_reg_rtx (XFmode);
+ emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */
+
+ emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
+ DONE;
+})
+
+(define_expand "exp10<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0, op1;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op0 = gen_reg_rtx (XFmode);
+ op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_exp10xf2 (op0, op1));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "exp2xf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ rtx op2;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op2 = gen_reg_rtx (XFmode);
+ emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */
+
+ emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
+ DONE;
+})
+
+(define_expand "exp2<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0, op1;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op0 = gen_reg_rtx (XFmode);
+ op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_exp2xf2 (op0, op1));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "expm1xf2"
+ [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "")
+ (match_dup 2)))
+ (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT))
+ (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4)))
+ (set (match_dup 9) (float_extend:XF (match_dup 13)))
+ (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1))
+ (parallel [(set (match_dup 7)
+ (unspec:XF [(match_dup 6) (match_dup 4)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 8)
+ (unspec:XF [(match_dup 6) (match_dup 4)]
+ UNSPEC_FSCALE_EXP))])
+ (parallel [(set (match_dup 10)
+ (unspec:XF [(match_dup 9) (match_dup 8)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 11)
+ (unspec:XF [(match_dup 9) (match_dup 8)]
+ UNSPEC_FSCALE_EXP))])
+ (set (match_dup 12) (minus:XF (match_dup 10)
+ (float_extend:XF (match_dup 13))))
+ (set (match_operand:XF 0 "register_operand" "")
+ (plus:XF (match_dup 12) (match_dup 7)))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ int i;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ for (i = 2; i < 13; i++)
+ operands[i] = gen_reg_rtx (XFmode);
+
+ operands[13]
+ = validize_mem (force_const_mem (SFmode, CONST1_RTX (SFmode))); /* fld1 */
+
+ emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */
+})
+
+(define_expand "expm1<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0, op1;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op0 = gen_reg_rtx (XFmode);
+ op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_expm1xf2 (op0, op1));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "ldexpxf3"
+ [(set (match_dup 3)
+ (float:XF (match_operand:SI 2 "register_operand" "")))
+ (parallel [(set (match_operand:XF 0 " register_operand" "")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")
+ (match_dup 3)]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 4)
+ (unspec:XF [(match_dup 1) (match_dup 3)]
+ UNSPEC_FSCALE_EXP))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ operands[3] = gen_reg_rtx (XFmode);
+ operands[4] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "ldexp<mode>3"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "general_operand" ""))
+ (use (match_operand:SI 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0, op1;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op0 = gen_reg_rtx (XFmode);
+ op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_expand "scalbxf3"
+ [(parallel [(set (match_operand:XF 0 " register_operand" "")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")
+ (match_operand:XF 2 "register_operand" "")]
+ UNSPEC_FSCALE_FRACT))
+ (set (match_dup 3)
+ (unspec:XF [(match_dup 1) (match_dup 2)]
+ UNSPEC_FSCALE_EXP))])]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ operands[3] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "scalb<mode>3"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "general_operand" ""))
+ (use (match_operand:MODEF 2 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0, op1, op2;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op0 = gen_reg_rtx (XFmode);
+ op1 = gen_reg_rtx (XFmode);
+ op2 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+ emit_insn (gen_scalbxf3 (op0, op1, op2));
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+
+(define_insn "sse4_1_round<mode>2"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x")
+ (match_operand:SI 2 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND))]
+ "TARGET_ROUND"
+ "%vrounds<ssemodefsuffix>\t{%2, %1, %d0|%d0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "rintxf2"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "frndint"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "XF")])
+
+(define_expand "rint<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))]
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math)"
+{
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math)
+ {
+ if (!TARGET_ROUND && optimize_insn_for_size_p ())
+ FAIL;
+ if (TARGET_ROUND)
+ emit_insn (gen_sse4_1_round<mode>2
+ (operands[0], operands[1], GEN_INT (0x04)));
+ else
+ ix86_expand_rint (operand0, operand1);
+ }
+ else
+ {
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_rintxf2 (op0, op1));
+
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ }
+ DONE;
+})
+
+(define_expand "round<mode>2"
+ [(match_operand:MODEF 0 "register_operand" "")
+ (match_operand:MODEF 1 "nonimmediate_operand" "")]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math && !flag_rounding_math"
+{
+ if (optimize_insn_for_size_p ())
+ FAIL;
+ if (TARGET_64BIT || (<MODE>mode != DFmode))
+ ix86_expand_round (operand0, operand1);
+ else
+ ix86_expand_rounddf_32 (operand0, operand1);
+ DONE;
+})
+
+(define_insn_and_split "*fistdi2_1"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST))]
+ "TARGET_USE_FANCY_MATH_387
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ if (memory_operand (operands[0], VOIDmode))
+ emit_insn (gen_fistdi2 (operands[0], operands[1]));
+ else
+ {
+ operands[2] = assign_386_stack_local (DImode, SLOT_TEMP);
+ emit_insn (gen_fistdi2_with_temp (operands[0], operands[1],
+ operands[2]));
+ }
+ DONE;
+}
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DI")])
+
+(define_insn "fistdi2"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST))
+ (clobber (match_scratch:XF 2 "=&1f"))]
+ "TARGET_USE_FANCY_MATH_387"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_with_temp"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+ UNSPEC_FIST))
+ (clobber (match_operand:DI 2 "memory_operand" "=X,m"))
+ (clobber (match_scratch:XF 3 "=&1f,&1f"))]
+ "TARGET_USE_FANCY_MATH_387"
+ "#"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST))
+ (clobber (match_operand:DI 2 "memory_operand" ""))
+ (clobber (match_scratch 3 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
+ (clobber (match_dup 3))])
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST))
+ (clobber (match_operand:DI 2 "memory_operand" ""))
+ (clobber (match_scratch 3 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
+ (clobber (match_dup 3))])]
+ "")
+
+(define_insn_and_split "*fist<mode>2_1"
+ [(set (match_operand:X87MODEI12 0 "register_operand" "")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST))]
+ "TARGET_USE_FANCY_MATH_387
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+ emit_insn (gen_fist<mode>2_with_temp (operands[0], operands[1],
+ operands[2]));
+ DONE;
+}
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2"
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST))]
+ "TARGET_USE_FANCY_MATH_387"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_with_temp"
+ [(set (match_operand:X87MODEI12 0 "register_operand" "=r")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST))
+ (clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))]
+ "TARGET_USE_FANCY_MATH_387"
+ "#"
+ [(set_attr "type" "fpspc")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "register_operand" "")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST))
+ (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
+ "reload_completed"
+ [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST))
+ (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
+ "reload_completed"
+ [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))]
+ "")
+
+(define_expand "lrintxf<mode>2"
+ [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+ (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST))]
+ "TARGET_USE_FANCY_MATH_387"
+ "")
+
+(define_expand "lrint<MODEF:mode><SSEMODEI24:mode>2"
+ [(set (match_operand:SSEMODEI24 0 "nonimmediate_operand" "")
+ (unspec:SSEMODEI24 [(match_operand:MODEF 1 "register_operand" "")]
+ UNSPEC_FIX_NOTRUNC))]
+ "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+ && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT)"
+ "")
+
+(define_expand "lround<MODEF:mode><SSEMODEI24:mode>2"
+ [(match_operand:SSEMODEI24 0 "nonimmediate_operand" "")
+ (match_operand:MODEF 1 "register_operand" "")]
+ "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+ && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT)
+ && !flag_trapping_math && !flag_rounding_math"
+{
+ if (optimize_insn_for_size_p ())
+ FAIL;
+ ix86_expand_lround (operand0, operand1);
+ DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_floor"
+ [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FRNDINT_FLOOR))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_FLOOR] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR);
+
+ emit_insn (gen_frndintxf2_floor_i387 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+}
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_floor_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "XF")])
+
+(define_expand "floorxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ if (optimize_insn_for_size_p ())
+ FAIL;
+ emit_insn (gen_frndintxf2_floor (operands[0], operands[1]));
+ DONE;
+})
+
+(define_expand "floor<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))]
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math)"
+{
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_ROUND || optimize_insn_for_speed_p ()))
+ {
+ if (!TARGET_ROUND && optimize_insn_for_size_p ())
+ FAIL;
+ if (TARGET_ROUND)
+ emit_insn (gen_sse4_1_round<mode>2
+ (operands[0], operands[1], GEN_INT (0x01)));
+ else if (TARGET_64BIT || (<MODE>mode != DFmode))
+ ix86_expand_floorceil (operand0, operand1, true);
+ else
+ ix86_expand_floorceildf_32 (operand0, operand1, true);
+ }
+ else
+ {
+ rtx op0, op1;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op0 = gen_reg_rtx (XFmode);
+ op1 = gen_reg_rtx (XFmode);
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_floor (op0, op1));
+
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ }
+ DONE;
+})
+
+(define_insn_and_split "*fist<mode>2_floor_1"
+ [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+ (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_FLOOR))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_FLOOR] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR);
+ if (memory_operand (operands[0], VOIDmode))
+ emit_insn (gen_fist<mode>2_floor (operands[0], operands[1],
+ operands[2], operands[3]));
+ else
+ {
+ operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+ emit_insn (gen_fist<mode>2_floor_with_temp (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[4]));
+ }
+ DONE;
+}
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fistdi2_floor"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))
+ (clobber (match_scratch:XF 4 "=&1f"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_floor_with_temp"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" "m,m"))
+ (use (match_operand:HI 3 "memory_operand" "m,m"))
+ (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
+ (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "#"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:DI 4 "memory_operand" ""))
+ (clobber (match_scratch 5 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (clobber (match_dup 5))])
+ (set (match_dup 0) (match_dup 4))]
+ "")
+
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:DI 4 "memory_operand" ""))
+ (clobber (match_scratch 5 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (clobber (match_dup 5))])]
+ "")
+
+(define_insn "fist<mode>2_floor"
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_floor_with_temp"
+ [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" "m,m"))
+ (use (match_operand:HI 3 "memory_operand" "m,m"))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "#"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "floor")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "register_operand" "")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)]
+ UNSPEC_FIST_FLOOR))
+ (use (match_dup 2))
+ (use (match_dup 3))])
+ (set (match_dup 0) (match_dup 4))]
+ "")
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_FLOOR))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
+ UNSPEC_FIST_FLOOR))
+ (use (match_dup 2))
+ (use (match_dup 3))])]
+ "")
+
+(define_expand "lfloorxf<mode>2"
+ [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+ (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_FLOOR))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "")
+
+(define_expand "lfloor<mode>di2"
+ [(match_operand:DI 0 "nonimmediate_operand" "")
+ (match_operand:MODEF 1 "register_operand" "")]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
+ && !flag_trapping_math"
+{
+ if (optimize_insn_for_size_p ())
+ FAIL;
+ ix86_expand_lfloorceil (operand0, operand1, true);
+ DONE;
+})
+
+(define_expand "lfloor<mode>si2"
+ [(match_operand:SI 0 "nonimmediate_operand" "")
+ (match_operand:MODEF 1 "register_operand" "")]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math"
+{
+ if (optimize_insn_for_size_p () && TARGET_64BIT)
+ FAIL;
+ ix86_expand_lfloorceil (operand0, operand1, true);
+ DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_ceil"
+ [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FRNDINT_CEIL))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_CEIL] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL);
+
+ emit_insn (gen_frndintxf2_ceil_i387 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+}
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_ceil_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_CEIL))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "XF")])
+
+(define_expand "ceilxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ if (optimize_insn_for_size_p ())
+ FAIL;
+ emit_insn (gen_frndintxf2_ceil (operands[0], operands[1]));
+ DONE;
+})
+
+(define_expand "ceil<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))]
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math)"
+{
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_ROUND || optimize_insn_for_speed_p ()))
+ {
+ if (TARGET_ROUND)
+ emit_insn (gen_sse4_1_round<mode>2
+ (operands[0], operands[1], GEN_INT (0x02)));
+ else if (optimize_insn_for_size_p ())
+ FAIL;
+ else if (TARGET_64BIT || (<MODE>mode != DFmode))
+ ix86_expand_floorceil (operand0, operand1, false);
+ else
+ ix86_expand_floorceildf_32 (operand0, operand1, false);
+ }
+ else
+ {
+ rtx op0, op1;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op0 = gen_reg_rtx (XFmode);
+ op1 = gen_reg_rtx (XFmode);
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_ceil (op0, op1));
+
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ }
+ DONE;
+})
+
+(define_insn_and_split "*fist<mode>2_ceil_1"
+ [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+ (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_CEIL))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_CEIL] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL);
+ if (memory_operand (operands[0], VOIDmode))
+ emit_insn (gen_fist<mode>2_ceil (operands[0], operands[1],
+ operands[2], operands[3]));
+ else
+ {
+ operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+ emit_insn (gen_fist<mode>2_ceil_with_temp (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[4]));
+ }
+ DONE;
+}
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fistdi2_ceil"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))
+ (clobber (match_scratch:XF 4 "=&1f"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_ceil_with_temp"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" "m,m"))
+ (use (match_operand:HI 3 "memory_operand" "m,m"))
+ (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
+ (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "#"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "DI")])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand" "")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:DI 4 "memory_operand" ""))
+ (clobber (match_scratch 5 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (clobber (match_dup 5))])
+ (set (match_dup 0) (match_dup 4))]
+ "")
+
+(define_split
+ [(set (match_operand:DI 0 "memory_operand" "")
+ (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:DI 4 "memory_operand" ""))
+ (clobber (match_scratch 5 ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL))
+ (use (match_dup 2))
+ (use (match_dup 3))
+ (clobber (match_dup 5))])]
+ "")
+
+(define_insn "fist<mode>2_ceil"
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "* return output_fix_trunc (insn, operands, 0);"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_ceil_with_temp"
+ [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" "m,m"))
+ (use (match_operand:HI 3 "memory_operand" "m,m"))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "#"
+ [(set_attr "type" "fistp")
+ (set_attr "i387_cw" "ceil")
+ (set_attr "mode" "<MODE>")])
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "register_operand" "")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)]
+ UNSPEC_FIST_CEIL))
+ (use (match_dup 2))
+ (use (match_dup 3))])
+ (set (match_dup 0) (match_dup 4))]
+ "")
+
+(define_split
+ [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+ (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_CEIL))
+ (use (match_operand:HI 2 "memory_operand" ""))
+ (use (match_operand:HI 3 "memory_operand" ""))
+ (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+ "reload_completed"
+ [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
+ UNSPEC_FIST_CEIL))
+ (use (match_dup 2))
+ (use (match_dup 3))])]
+ "")
+
+(define_expand "lceilxf<mode>2"
+ [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+ (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FIST_CEIL))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_USE_FANCY_MATH_387
+ && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+ "")
+
+(define_expand "lceil<mode>di2"
+ [(match_operand:DI 0 "nonimmediate_operand" "")
+ (match_operand:MODEF 1 "register_operand" "")]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
+ && !flag_trapping_math"
+{
+ ix86_expand_lfloorceil (operand0, operand1, false);
+ DONE;
+})
+
+(define_expand "lceil<mode>si2"
+ [(match_operand:SI 0 "nonimmediate_operand" "")
+ (match_operand:MODEF 1 "register_operand" "")]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math"
+{
+ ix86_expand_lfloorceil (operand0, operand1, false);
+ DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_trunc"
+ [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FRNDINT_TRUNC))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_TRUNC] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
+
+ emit_insn (gen_frndintxf2_trunc_i387 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+}
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_trunc_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_TRUNC))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "trunc")
+ (set_attr "mode" "XF")])
+
+(define_expand "btruncxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ if (optimize_insn_for_size_p ())
+ FAIL;
+ emit_insn (gen_frndintxf2_trunc (operands[0], operands[1]));
+ DONE;
+})
+
+(define_expand "btrunc<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))]
+ "(TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math)"
+{
+ if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+ && !flag_trapping_math
+ && (TARGET_ROUND || optimize_insn_for_speed_p ()))
+ {
+ if (TARGET_ROUND)
+ emit_insn (gen_sse4_1_round<mode>2
+ (operands[0], operands[1], GEN_INT (0x03)));
+ else if (optimize_insn_for_size_p ())
+ FAIL;
+ else if (TARGET_64BIT || (<MODE>mode != DFmode))
+ ix86_expand_trunc (operand0, operand1);
+ else
+ ix86_expand_truncdf_32 (operand0, operand1);
+ }
+ else
+ {
+ rtx op0, op1;
+
+ if (optimize_insn_for_size_p ())
+ FAIL;
+
+ op0 = gen_reg_rtx (XFmode);
+ op1 = gen_reg_rtx (XFmode);
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_trunc (op0, op1));
+
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ }
+ DONE;
+})
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_mask_pm"
+ [(set (match_operand:XF 0 "register_operand" "")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "")]
+ UNSPEC_FRNDINT_MASK_PM))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ ix86_optimize_mode_switching[I387_MASK_PM] = 1;
+
+ operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+ operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM);
+
+ emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+}
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "mask_pm")
+ (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_mask_pm_i387"
+ [(set (match_operand:XF 0 "register_operand" "=f")
+ (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+ UNSPEC_FRNDINT_MASK_PM))
+ (use (match_operand:HI 2 "memory_operand" "m"))
+ (use (match_operand:HI 3 "memory_operand" "m"))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+ "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2"
+ [(set_attr "type" "frndint")
+ (set_attr "i387_cw" "mask_pm")
+ (set_attr "mode" "XF")])
+
+(define_expand "nearbyintxf2"
+ [(use (match_operand:XF 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && flag_unsafe_math_optimizations"
+{
+ emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1]));
+
+ DONE;
+})
+
+(define_expand "nearbyint<mode>2"
+ [(use (match_operand:MODEF 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+ emit_insn (gen_frndintxf2_mask_pm (op0, op1));
+
+ emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+ DONE;
+})
+
+(define_insn "fxam<mode>2_i387"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI
+ [(match_operand:X87MODEF 1 "register_operand" "f")]
+ UNSPEC_FXAM))]
+ "TARGET_USE_FANCY_MATH_387"
+ "fxam\n\tfnstsw\t%0"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "fxam<mode>2_i387_with_temp"
+ [(set (match_operand:HI 0 "register_operand" "")
+ (unspec:HI
+ [(match_operand:MODEF 1 "memory_operand" "")]
+ UNSPEC_FXAM_MEM))]
+ "TARGET_USE_FANCY_MATH_387
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(set (match_dup 2)(match_dup 1))
+ (set (match_dup 0)
+ (unspec:HI [(match_dup 2)] UNSPEC_FXAM))]
+{
+ operands[2] = gen_reg_rtx (<MODE>mode);
+
+ MEM_VOLATILE_P (operands[1]) = 1;
+}
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "isinfxf2"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && TARGET_C99_FUNCTIONS"
+{
+ rtx mask = GEN_INT (0x45);
+ rtx val = GEN_INT (0x05);
+
+ rtx cond;
+
+ rtx scratch = gen_reg_rtx (HImode);
+ rtx res = gen_reg_rtx (QImode);
+
+ emit_insn (gen_fxamxf2_i387 (scratch, operands[1]));
+
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, mask));
+ emit_insn (gen_cmpqi_ext_3 (scratch, val));
+ cond = gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG (CCmode, FLAGS_REG),
+ const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, res, cond));
+ emit_insn (gen_zero_extendqisi2 (operands[0], res));
+ DONE;
+})
+
+(define_expand "isinf<mode>2"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (match_operand:MODEF 1 "nonimmediate_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && TARGET_C99_FUNCTIONS
+ && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+ rtx mask = GEN_INT (0x45);
+ rtx val = GEN_INT (0x05);
+
+ rtx cond;
+
+ rtx scratch = gen_reg_rtx (HImode);
+ rtx res = gen_reg_rtx (QImode);
+
+ /* Remove excess precision by forcing value through memory. */
+ if (memory_operand (operands[1], VOIDmode))
+ emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, operands[1]));
+ else
+ {
+ int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
+ rtx temp = assign_386_stack_local (<MODE>mode, slot);
+
+ emit_move_insn (temp, operands[1]);
+ emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, temp));
+ }
+
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, mask));
+ emit_insn (gen_cmpqi_ext_3 (scratch, val));
+ cond = gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG (CCmode, FLAGS_REG),
+ const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, res, cond));
+ emit_insn (gen_zero_extendqisi2 (operands[0], res));
+ DONE;
+})
+
+(define_expand "signbit<mode>2"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (match_operand:X87MODEF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+ rtx mask = GEN_INT (0x0200);
+
+ rtx scratch = gen_reg_rtx (HImode);
+
+ emit_insn (gen_fxam<mode>2_i387 (scratch, operands[1]));
+ emit_insn (gen_andsi3 (operands[0], gen_lowpart (SImode, scratch), mask));
+ DONE;
+})
+
+;; Block operation instructions
+
+(define_insn "cld"
+ [(unspec_volatile [(const_int 0)] UNSPECV_CLD)]
+ ""
+ "cld"
+ [(set_attr "length" "1")
+ (set_attr "length_immediate" "0")
+ (set_attr "modrm" "0")])
+
+(define_expand "movmemsi"
+ [(use (match_operand:BLK 0 "memory_operand" ""))
+ (use (match_operand:BLK 1 "memory_operand" ""))
+ (use (match_operand:SI 2 "nonmemory_operand" ""))
+ (use (match_operand:SI 3 "const_int_operand" ""))
+ (use (match_operand:SI 4 "const_int_operand" ""))
+ (use (match_operand:SI 5 "const_int_operand" ""))]
+ ""
+{
+ if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3],
+ operands[4], operands[5]))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_expand "movmemdi"
+ [(use (match_operand:BLK 0 "memory_operand" ""))
+ (use (match_operand:BLK 1 "memory_operand" ""))
+ (use (match_operand:DI 2 "nonmemory_operand" ""))
+ (use (match_operand:DI 3 "const_int_operand" ""))
+ (use (match_operand:SI 4 "const_int_operand" ""))
+ (use (match_operand:SI 5 "const_int_operand" ""))]
+ "TARGET_64BIT"
+{
+ if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3],
+ operands[4], operands[5]))
+ DONE;
+ else
+ FAIL;
+})
+
+;; Most CPUs don't like single string operations
+;; Handle this case here to simplify previous expander.
+
+(define_expand "strmov"
+ [(set (match_dup 4) (match_operand 3 "memory_operand" ""))
+ (set (match_operand 1 "memory_operand" "") (match_dup 4))
+ (parallel [(set (match_operand 0 "register_operand" "") (match_dup 5))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_operand 2 "register_operand" "") (match_dup 6))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ rtx adjust = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[1])));
+
+ /* If .md ever supports :P for Pmode, these can be directly
+ in the pattern above. */
+ operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust);
+ operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust);
+
+ /* Can't use this if the user has appropriated esi or edi. */
+ if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
+ && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
+ {
+ emit_insn (gen_strmov_singleop (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[5], operands[6]));
+ DONE;
+ }
+
+ operands[4] = gen_reg_rtx (GET_MODE (operands[1]));
+})
+
+(define_expand "strmov_singleop"
+ [(parallel [(set (match_operand 1 "memory_operand" "")
+ (match_operand 3 "memory_operand" ""))
+ (set (match_operand 0 "register_operand" "")
+ (match_operand 4 "" ""))
+ (set (match_operand 2 "register_operand" "")
+ (match_operand 5 "" ""))])]
+ ""
+ "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*strmovdi_rex_1"
+ [(set (mem:DI (match_operand:DI 2 "register_operand" "0"))
+ (mem:DI (match_operand:DI 3 "register_operand" "1")))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 2)
+ (const_int 8)))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_dup 3)
+ (const_int 8)))]
+ "TARGET_64BIT"
+ "movsq"
+ [(set_attr "type" "str")
+ (set_attr "mode" "DI")
+ (set_attr "memory" "both")])
+
+(define_insn "*strmovsi_1"
+ [(set (mem:SI (match_operand:SI 2 "register_operand" "0"))
+ (mem:SI (match_operand:SI 3 "register_operand" "1")))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_dup 2)
+ (const_int 4)))
+ (set (match_operand:SI 1 "register_operand" "=S")
+ (plus:SI (match_dup 3)
+ (const_int 4)))]
+ "!TARGET_64BIT"
+ "movs{l|d}"
+ [(set_attr "type" "str")
+ (set_attr "mode" "SI")
+ (set_attr "memory" "both")])
+
+(define_insn "*strmovsi_rex_1"
+ [(set (mem:SI (match_operand:DI 2 "register_operand" "0"))
+ (mem:SI (match_operand:DI 3 "register_operand" "1")))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 2)
+ (const_int 4)))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_dup 3)
+ (const_int 4)))]
+ "TARGET_64BIT"
+ "movs{l|d}"
+ [(set_attr "type" "str")
+ (set_attr "mode" "SI")
+ (set_attr "memory" "both")])
+
+(define_insn "*strmovhi_1"
+ [(set (mem:HI (match_operand:SI 2 "register_operand" "0"))
+ (mem:HI (match_operand:SI 3 "register_operand" "1")))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_dup 2)
+ (const_int 2)))
+ (set (match_operand:SI 1 "register_operand" "=S")
+ (plus:SI (match_dup 3)
+ (const_int 2)))]
+ "!TARGET_64BIT"
+ "movsw"
+ [(set_attr "type" "str")
+ (set_attr "memory" "both")
+ (set_attr "mode" "HI")])
+
+(define_insn "*strmovhi_rex_1"
+ [(set (mem:HI (match_operand:DI 2 "register_operand" "0"))
+ (mem:HI (match_operand:DI 3 "register_operand" "1")))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 2)
+ (const_int 2)))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_dup 3)
+ (const_int 2)))]
+ "TARGET_64BIT"
+ "movsw"
+ [(set_attr "type" "str")
+ (set_attr "memory" "both")
+ (set_attr "mode" "HI")])
+
+(define_insn "*strmovqi_1"
+ [(set (mem:QI (match_operand:SI 2 "register_operand" "0"))
+ (mem:QI (match_operand:SI 3 "register_operand" "1")))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_dup 2)
+ (const_int 1)))
+ (set (match_operand:SI 1 "register_operand" "=S")
+ (plus:SI (match_dup 3)
+ (const_int 1)))]
+ "!TARGET_64BIT"
+ "movsb"
+ [(set_attr "type" "str")
+ (set_attr "memory" "both")
+ (set_attr "mode" "QI")])
+
+(define_insn "*strmovqi_rex_1"
+ [(set (mem:QI (match_operand:DI 2 "register_operand" "0"))
+ (mem:QI (match_operand:DI 3 "register_operand" "1")))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 2)
+ (const_int 1)))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_dup 3)
+ (const_int 1)))]
+ "TARGET_64BIT"
+ "movsb"
+ [(set_attr "type" "str")
+ (set_attr "memory" "both")
+ (set_attr "mode" "QI")])
+
+(define_expand "rep_mov"
+ [(parallel [(set (match_operand 4 "register_operand" "") (const_int 0))
+ (set (match_operand 0 "register_operand" "")
+ (match_operand 5 "" ""))
+ (set (match_operand 2 "register_operand" "")
+ (match_operand 6 "" ""))
+ (set (match_operand 1 "memory_operand" "")
+ (match_operand 3 "memory_operand" ""))
+ (use (match_dup 4))])]
+ ""
+ "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*rep_movdi_rex64"
+ [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
+ (const_int 3))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (ashift:DI (match_dup 5) (const_int 3))
+ (match_operand:DI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))]
+ "TARGET_64BIT"
+ "rep movsq"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "DI")])
+
+(define_insn "*rep_movsi"
+ [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (ashift:SI (match_operand:SI 5 "register_operand" "2")
+ (const_int 2))
+ (match_operand:SI 3 "register_operand" "0")))
+ (set (match_operand:SI 1 "register_operand" "=S")
+ (plus:SI (ashift:SI (match_dup 5) (const_int 2))
+ (match_operand:SI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))]
+ "!TARGET_64BIT"
+ "rep movs{l|d}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rep_movsi_rex64"
+ [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2")
+ (const_int 2))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (ashift:DI (match_dup 5) (const_int 2))
+ (match_operand:DI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))]
+ "TARGET_64BIT"
+ "rep movs{l|d}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rep_movqi"
+ [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_operand:SI 3 "register_operand" "0")
+ (match_operand:SI 5 "register_operand" "2")))
+ (set (match_operand:SI 1 "register_operand" "=S")
+ (plus:SI (match_operand:SI 4 "register_operand" "1") (match_dup 5)))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))]
+ "!TARGET_64BIT"
+ "rep movsb"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rep_movqi_rex64"
+ [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_operand:DI 3 "register_operand" "0")
+ (match_operand:DI 5 "register_operand" "2")))
+ (set (match_operand:DI 1 "register_operand" "=S")
+ (plus:DI (match_operand:DI 4 "register_operand" "1") (match_dup 5)))
+ (set (mem:BLK (match_dup 3))
+ (mem:BLK (match_dup 4)))
+ (use (match_dup 5))]
+ "TARGET_64BIT"
+ "rep movsb"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "both")
+ (set_attr "mode" "SI")])
+
+(define_expand "setmemsi"
+ [(use (match_operand:BLK 0 "memory_operand" ""))
+ (use (match_operand:SI 1 "nonmemory_operand" ""))
+ (use (match_operand 2 "const_int_operand" ""))
+ (use (match_operand 3 "const_int_operand" ""))
+ (use (match_operand:SI 4 "const_int_operand" ""))
+ (use (match_operand:SI 5 "const_int_operand" ""))]
+ ""
+{
+ if (ix86_expand_setmem (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[4], operands[5]))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_expand "setmemdi"
+ [(use (match_operand:BLK 0 "memory_operand" ""))
+ (use (match_operand:DI 1 "nonmemory_operand" ""))
+ (use (match_operand 2 "const_int_operand" ""))
+ (use (match_operand 3 "const_int_operand" ""))
+ (use (match_operand 4 "const_int_operand" ""))
+ (use (match_operand 5 "const_int_operand" ""))]
+ "TARGET_64BIT"
+{
+ if (ix86_expand_setmem (operands[0], operands[1],
+ operands[2], operands[3],
+ operands[4], operands[5]))
+ DONE;
+ else
+ FAIL;
+})
+
+;; Most CPUs don't like single string operations
+;; Handle this case here to simplify previous expander.
+
+(define_expand "strset"
+ [(set (match_operand 1 "memory_operand" "")
+ (match_operand 2 "register_operand" ""))
+ (parallel [(set (match_operand 0 "register_operand" "")
+ (match_dup 3))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ if (GET_MODE (operands[1]) != GET_MODE (operands[2]))
+ operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0);
+
+ /* If .md ever supports :P for Pmode, this can be directly
+ in the pattern above. */
+ operands[3] = gen_rtx_PLUS (Pmode, operands[0],
+ GEN_INT (GET_MODE_SIZE (GET_MODE
+ (operands[2]))));
+ if (TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
+ {
+ emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+ }
+})
+
+(define_expand "strset_singleop"
+ [(parallel [(set (match_operand 1 "memory_operand" "")
+ (match_operand 2 "register_operand" ""))
+ (set (match_operand 0 "register_operand" "")
+ (match_operand 3 "" ""))])]
+ ""
+ "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*strsetdi_rex_1"
+ [(set (mem:DI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:DI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 8)))]
+ "TARGET_64BIT"
+ "stosq"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "DI")])
+
+(define_insn "*strsetsi_1"
+ [(set (mem:SI (match_operand:SI 1 "register_operand" "0"))
+ (match_operand:SI 2 "register_operand" "a"))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_dup 1)
+ (const_int 4)))]
+ "!TARGET_64BIT"
+ "stos{l|d}"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "SI")])
+
+(define_insn "*strsetsi_rex_1"
+ [(set (mem:SI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:SI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 4)))]
+ "TARGET_64BIT"
+ "stos{l|d}"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "SI")])
+
+(define_insn "*strsethi_1"
+ [(set (mem:HI (match_operand:SI 1 "register_operand" "0"))
+ (match_operand:HI 2 "register_operand" "a"))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_dup 1)
+ (const_int 2)))]
+ "!TARGET_64BIT"
+ "stosw"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "HI")])
+
+(define_insn "*strsethi_rex_1"
+ [(set (mem:HI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:HI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 2)))]
+ "TARGET_64BIT"
+ "stosw"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "HI")])
+
+(define_insn "*strsetqi_1"
+ [(set (mem:QI (match_operand:SI 1 "register_operand" "0"))
+ (match_operand:QI 2 "register_operand" "a"))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_dup 1)
+ (const_int 1)))]
+ "!TARGET_64BIT"
+ "stosb"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "QI")])
+
+(define_insn "*strsetqi_rex_1"
+ [(set (mem:QI (match_operand:DI 1 "register_operand" "0"))
+ (match_operand:QI 2 "register_operand" "a"))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_dup 1)
+ (const_int 1)))]
+ "TARGET_64BIT"
+ "stosb"
+ [(set_attr "type" "str")
+ (set_attr "memory" "store")
+ (set_attr "mode" "QI")])
+
+(define_expand "rep_stos"
+ [(parallel [(set (match_operand 1 "register_operand" "") (const_int 0))
+ (set (match_operand 0 "register_operand" "")
+ (match_operand 4 "" ""))
+ (set (match_operand 2 "memory_operand" "") (const_int 0))
+ (use (match_operand 3 "register_operand" ""))
+ (use (match_dup 1))])]
+ ""
+ "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*rep_stosdi_rex64"
+ [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
+ (const_int 3))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:DI 2 "register_operand" "a"))
+ (use (match_dup 4))]
+ "TARGET_64BIT"
+ "rep stosq"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "DI")])
+
+(define_insn "*rep_stossi"
+ [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (ashift:SI (match_operand:SI 4 "register_operand" "1")
+ (const_int 2))
+ (match_operand:SI 3 "register_operand" "0")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:SI 2 "register_operand" "a"))
+ (use (match_dup 4))]
+ "!TARGET_64BIT"
+ "rep stos{l|d}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rep_stossi_rex64"
+ [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1")
+ (const_int 2))
+ (match_operand:DI 3 "register_operand" "0")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:SI 2 "register_operand" "a"))
+ (use (match_dup 4))]
+ "TARGET_64BIT"
+ "rep stos{l|d}"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "SI")])
+
+(define_insn "*rep_stosqi"
+ [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:SI 0 "register_operand" "=D")
+ (plus:SI (match_operand:SI 3 "register_operand" "0")
+ (match_operand:SI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:QI 2 "register_operand" "a"))
+ (use (match_dup 4))]
+ "!TARGET_64BIT"
+ "rep stosb"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "QI")])
+
+(define_insn "*rep_stosqi_rex64"
+ [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
+ (set (match_operand:DI 0 "register_operand" "=D")
+ (plus:DI (match_operand:DI 3 "register_operand" "0")
+ (match_operand:DI 4 "register_operand" "1")))
+ (set (mem:BLK (match_dup 3))
+ (const_int 0))
+ (use (match_operand:QI 2 "register_operand" "a"))
+ (use (match_dup 4))]
+ "TARGET_64BIT"
+ "rep stosb"
+ [(set_attr "type" "str")
+ (set_attr "prefix_rep" "1")
+ (set_attr "memory" "store")
+ (set_attr "mode" "QI")])
+
+(define_expand "cmpstrnsi"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (compare:SI (match_operand:BLK 1 "general_operand" "")
+ (match_operand:BLK 2 "general_operand" "")))
+ (use (match_operand 3 "general_operand" ""))
+ (use (match_operand 4 "immediate_operand" ""))]
+ ""
+{
+ rtx addr1, addr2, out, outlow, count, countreg, align;
+
+ if (!TARGET_INLINE_COMPARES)
+ FAIL;
+
+ if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
+ FAIL;
+
+ /* Can't use this if the user has appropriated esi or edi. */
+ if (fixed_regs[SI_REG] || fixed_regs[DI_REG])
+ FAIL;
+
+ out = operands[0];
+ if (!REG_P (out))
+ out = gen_reg_rtx (SImode);
+
+ addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
+ addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0));
+ if (addr1 != XEXP (operands[1], 0))
+ operands[1] = replace_equiv_address_nv (operands[1], addr1);
+ if (addr2 != XEXP (operands[2], 0))
+ operands[2] = replace_equiv_address_nv (operands[2], addr2);
+
+ count = operands[3];
+ countreg = ix86_zero_extend_to_Pmode (count);
+
+ /* %%% Iff we are testing strict equality, we can use known alignment
+ to good advantage. This may be possible with combine, particularly
+ once cc0 is dead. */
+ align = operands[4];
+
+ if (CONST_INT_P (count))
+ {
+ if (INTVAL (count) == 0)
+ {
+ emit_move_insn (operands[0], const0_rtx);
+ DONE;
+ }
+ emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align,
+ operands[1], operands[2]));
+ }
+ else
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_cmpdi_1_rex64 (countreg, countreg));
+ else
+ emit_insn (gen_cmpsi_1 (countreg, countreg));
+ emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
+ operands[1], operands[2]));
+ }
+
+ outlow = gen_lowpart (QImode, out);
+ emit_insn (gen_cmpintqi (outlow));
+ emit_move_insn (out, gen_rtx_SIGN_EXTEND (SImode, outlow));
+
+ if (operands[0] != out)
+ emit_move_insn (operands[0], out);
+
+ DONE;
+})
+
+;; Produce a tri-state integer (-1, 0, 1) from condition codes.
+
+(define_expand "cmpintqi"
+ [(set (match_dup 1)
+ (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+ (set (match_dup 2)
+ (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+ (parallel [(set (match_operand:QI 0 "register_operand" "")
+ (minus:QI (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "operands[1] = gen_reg_rtx (QImode);
+ operands[2] = gen_reg_rtx (QImode);")
+
+;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is
+;; zero. Emit extra code to make sure that a zero-length compare is EQ.
+
+(define_expand "cmpstrnqi_nz_1"
+ [(parallel [(set (reg:CC FLAGS_REG)
+ (compare:CC (match_operand 4 "memory_operand" "")
+ (match_operand 5 "memory_operand" "")))
+ (use (match_operand 2 "register_operand" ""))
+ (use (match_operand:SI 3 "immediate_operand" ""))
+ (clobber (match_operand 0 "register_operand" ""))
+ (clobber (match_operand 1 "register_operand" ""))
+ (clobber (match_dup 2))])]
+ ""
+ "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*cmpstrnqi_nz_1"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0"))
+ (mem:BLK (match_operand:SI 5 "register_operand" "1"))))
+ (use (match_operand:SI 6 "register_operand" "2"))
+ (use (match_operand:SI 3 "immediate_operand" "i"))
+ (clobber (match_operand:SI 0 "register_operand" "=S"))
+ (clobber (match_operand:SI 1 "register_operand" "=D"))
+ (clobber (match_operand:SI 2 "register_operand" "=c"))]
+ "!TARGET_64BIT"
+ "repz cmpsb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+(define_insn "*cmpstrnqi_nz_rex_1"
+ [(set (reg:CC FLAGS_REG)
+ (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
+ (mem:BLK (match_operand:DI 5 "register_operand" "1"))))
+ (use (match_operand:DI 6 "register_operand" "2"))
+ (use (match_operand:SI 3 "immediate_operand" "i"))
+ (clobber (match_operand:DI 0 "register_operand" "=S"))
+ (clobber (match_operand:DI 1 "register_operand" "=D"))
+ (clobber (match_operand:DI 2 "register_operand" "=c"))]
+ "TARGET_64BIT"
+ "repz cmpsb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+;; The same, but the count is not known to not be zero.
+
+(define_expand "cmpstrnqi_1"
+ [(parallel [(set (reg:CC FLAGS_REG)
+ (if_then_else:CC (ne (match_operand 2 "register_operand" "")
+ (const_int 0))
+ (compare:CC (match_operand 4 "memory_operand" "")
+ (match_operand 5 "memory_operand" ""))
+ (const_int 0)))
+ (use (match_operand:SI 3 "immediate_operand" ""))
+ (use (reg:CC FLAGS_REG))
+ (clobber (match_operand 0 "register_operand" ""))
+ (clobber (match_operand 1 "register_operand" ""))
+ (clobber (match_dup 2))])]
+ ""
+ "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*cmpstrnqi_1"
+ [(set (reg:CC FLAGS_REG)
+ (if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2")
+ (const_int 0))
+ (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0"))
+ (mem:BLK (match_operand:SI 5 "register_operand" "1")))
+ (const_int 0)))
+ (use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:CC FLAGS_REG))
+ (clobber (match_operand:SI 0 "register_operand" "=S"))
+ (clobber (match_operand:SI 1 "register_operand" "=D"))
+ (clobber (match_operand:SI 2 "register_operand" "=c"))]
+ "!TARGET_64BIT"
+ "repz cmpsb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+(define_insn "*cmpstrnqi_rex_1"
+ [(set (reg:CC FLAGS_REG)
+ (if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2")
+ (const_int 0))
+ (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
+ (mem:BLK (match_operand:DI 5 "register_operand" "1")))
+ (const_int 0)))
+ (use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:CC FLAGS_REG))
+ (clobber (match_operand:DI 0 "register_operand" "=S"))
+ (clobber (match_operand:DI 1 "register_operand" "=D"))
+ (clobber (match_operand:DI 2 "register_operand" "=c"))]
+ "TARGET_64BIT"
+ "repz cmpsb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+(define_expand "strlensi"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (unspec:SI [(match_operand:BLK 1 "general_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")
+ (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))]
+ ""
+{
+ if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_expand "strlendi"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (unspec:DI [(match_operand:BLK 1 "general_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")
+ (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))]
+ ""
+{
+ if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_expand "strlenqi_1"
+ [(parallel [(set (match_operand 0 "register_operand" "") (match_operand 2 "" ""))
+ (clobber (match_operand 1 "register_operand" ""))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ "ix86_current_function_needs_cld = 1;")
+
+(define_insn "*strlenqi_1"
+ [(set (match_operand:SI 0 "register_operand" "=&c")
+ (unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1"))
+ (match_operand:QI 2 "register_operand" "a")
+ (match_operand:SI 3 "immediate_operand" "i")
+ (match_operand:SI 4 "register_operand" "0")] UNSPEC_SCAS))
+ (clobber (match_operand:SI 1 "register_operand" "=D"))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT"
+ "repnz scasb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+(define_insn "*strlenqi_rex_1"
+ [(set (match_operand:DI 0 "register_operand" "=&c")
+ (unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1"))
+ (match_operand:QI 2 "register_operand" "a")
+ (match_operand:DI 3 "immediate_operand" "i")
+ (match_operand:DI 4 "register_operand" "0")] UNSPEC_SCAS))
+ (clobber (match_operand:DI 1 "register_operand" "=D"))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "repnz scasb"
+ [(set_attr "type" "str")
+ (set_attr "mode" "QI")
+ (set_attr "prefix_rep" "1")])
+
+;; Peephole optimizations to clean up after cmpstrn*. This should be
+;; handled in combine, but it is not currently up to the task.
+;; When used for their truth value, the cmpstrn* expanders generate
+;; code like this:
+;;
+;; repz cmpsb
+;; seta %al
+;; setb %dl
+;; cmpb %al, %dl
+;; jcc label
+;;
+;; The intermediate three instructions are unnecessary.
+
+;; This one handles cmpstrn*_nz_1...
+(define_peephole2
+ [(parallel[
+ (set (reg:CC FLAGS_REG)
+ (compare:CC (mem:BLK (match_operand 4 "register_operand" ""))
+ (mem:BLK (match_operand 5 "register_operand" ""))))
+ (use (match_operand 6 "register_operand" ""))
+ (use (match_operand:SI 3 "immediate_operand" ""))
+ (clobber (match_operand 0 "register_operand" ""))
+ (clobber (match_operand 1 "register_operand" ""))
+ (clobber (match_operand 2 "register_operand" ""))])
+ (set (match_operand:QI 7 "register_operand" "")
+ (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+ (set (match_operand:QI 8 "register_operand" "")
+ (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+ (set (reg FLAGS_REG)
+ (compare (match_dup 7) (match_dup 8)))
+ ]
+ "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
+ [(parallel[
+ (set (reg:CC FLAGS_REG)
+ (compare:CC (mem:BLK (match_dup 4))
+ (mem:BLK (match_dup 5))))
+ (use (match_dup 6))
+ (use (match_dup 3))
+ (clobber (match_dup 0))
+ (clobber (match_dup 1))
+ (clobber (match_dup 2))])]
+ "")
+
+;; ...and this one handles cmpstrn*_1.
+(define_peephole2
+ [(parallel[
+ (set (reg:CC FLAGS_REG)
+ (if_then_else:CC (ne (match_operand 6 "register_operand" "")
+ (const_int 0))
+ (compare:CC (mem:BLK (match_operand 4 "register_operand" ""))
+ (mem:BLK (match_operand 5 "register_operand" "")))
+ (const_int 0)))
+ (use (match_operand:SI 3 "immediate_operand" ""))
+ (use (reg:CC FLAGS_REG))
+ (clobber (match_operand 0 "register_operand" ""))
+ (clobber (match_operand 1 "register_operand" ""))
+ (clobber (match_operand 2 "register_operand" ""))])
+ (set (match_operand:QI 7 "register_operand" "")
+ (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
+ (set (match_operand:QI 8 "register_operand" "")
+ (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
+ (set (reg FLAGS_REG)
+ (compare (match_dup 7) (match_dup 8)))
+ ]
+ "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
+ [(parallel[
+ (set (reg:CC FLAGS_REG)
+ (if_then_else:CC (ne (match_dup 6)
+ (const_int 0))
+ (compare:CC (mem:BLK (match_dup 4))
+ (mem:BLK (match_dup 5)))
+ (const_int 0)))
+ (use (match_dup 3))
+ (use (reg:CC FLAGS_REG))
+ (clobber (match_dup 0))
+ (clobber (match_dup 1))
+ (clobber (match_dup 2))])]
+ "")
+
+
+
+;; Conditional move instructions.
+
+(define_expand "movdicc"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (if_then_else:DI (match_operand 1 "comparison_operator" "")
+ (match_operand:DI 2 "general_operand" "")
+ (match_operand:DI 3 "general_operand" "")))]
+ "TARGET_64BIT"
+ "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
+
+(define_insn "x86_movdicc_0_m1_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (if_then_else:DI (match_operand 1 "ix86_carry_flag_operator" "")
+ (const_int -1)
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "sbb{q}\t%0, %0"
+ ; Since we don't have the proper number of operands for an alu insn,
+ ; fill in all the blanks.
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "memory" "none")
+ (set_attr "imm_disp" "false")
+ (set_attr "mode" "DI")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*x86_movdicc_0_m1_se"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (sign_extract:DI (match_operand 1 "ix86_carry_flag_operator" "")
+ (const_int 1)
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "sbb{q}\t%0, %0"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "memory" "none")
+ (set_attr "imm_disp" "false")
+ (set_attr "mode" "DI")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*movdicc_c_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:DI 2 "nonimmediate_operand" "rm,0")
+ (match_operand:DI 3 "nonimmediate_operand" "0,rm")))]
+ "TARGET_64BIT && TARGET_CMOVE
+ && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+ "@
+ cmov%O2%C1\t{%2, %0|%0, %2}
+ cmov%O2%c1\t{%3, %0|%0, %3}"
+ [(set_attr "type" "icmov")
+ (set_attr "mode" "DI")])
+
+(define_expand "movsicc"
+ [(set (match_operand:SI 0 "register_operand" "")
+ (if_then_else:SI (match_operand 1 "comparison_operator" "")
+ (match_operand:SI 2 "general_operand" "")
+ (match_operand:SI 3 "general_operand" "")))]
+ ""
+ "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
+
+;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
+;; the register first winds up with `sbbl $0,reg', which is also weird.
+;; So just document what we're doing explicitly.
+
+(define_insn "x86_movsicc_0_m1"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (if_then_else:SI (match_operand 1 "ix86_carry_flag_operator" "")
+ (const_int -1)
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "sbb{l}\t%0, %0"
+ ; Since we don't have the proper number of operands for an alu insn,
+ ; fill in all the blanks.
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "memory" "none")
+ (set_attr "imm_disp" "false")
+ (set_attr "mode" "SI")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*x86_movsicc_0_m1_se"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (sign_extract:SI (match_operand 1 "ix86_carry_flag_operator" "")
+ (const_int 1)
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "sbb{l}\t%0, %0"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "memory" "none")
+ (set_attr "imm_disp" "false")
+ (set_attr "mode" "SI")
+ (set_attr "length_immediate" "0")])
+
+(define_insn "*movsicc_noc"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (if_then_else:SI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:SI 2 "nonimmediate_operand" "rm,0")
+ (match_operand:SI 3 "nonimmediate_operand" "0,rm")))]
+ "TARGET_CMOVE
+ && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+ "@
+ cmov%O2%C1\t{%2, %0|%0, %2}
+ cmov%O2%c1\t{%3, %0|%0, %3}"
+ [(set_attr "type" "icmov")
+ (set_attr "mode" "SI")])
+
+(define_expand "movhicc"
+ [(set (match_operand:HI 0 "register_operand" "")
+ (if_then_else:HI (match_operand 1 "comparison_operator" "")
+ (match_operand:HI 2 "general_operand" "")
+ (match_operand:HI 3 "general_operand" "")))]
+ "TARGET_HIMODE_MATH"
+ "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
+
+(define_insn "*movhicc_noc"
+ [(set (match_operand:HI 0 "register_operand" "=r,r")
+ (if_then_else:HI (match_operator 1 "ix86_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:HI 2 "nonimmediate_operand" "rm,0")
+ (match_operand:HI 3 "nonimmediate_operand" "0,rm")))]
+ "TARGET_CMOVE
+ && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+ "@
+ cmov%O2%C1\t{%2, %0|%0, %2}
+ cmov%O2%c1\t{%3, %0|%0, %3}"
+ [(set_attr "type" "icmov")
+ (set_attr "mode" "HI")])
+
+(define_expand "movqicc"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (if_then_else:QI (match_operand 1 "comparison_operator" "")
+ (match_operand:QI 2 "general_operand" "")
+ (match_operand:QI 3 "general_operand" "")))]
+ "TARGET_QIMODE_MATH"
+ "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
+
+(define_insn_and_split "*movqicc_noc"
+ [(set (match_operand:QI 0 "register_operand" "=r,r")
+ (if_then_else:QI (match_operator 1 "ix86_comparison_operator"
+ [(match_operand 4 "flags_reg_operand" "")
+ (const_int 0)])
+ (match_operand:QI 2 "register_operand" "r,0")
+ (match_operand:QI 3 "register_operand" "0,r")))]
+ "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
+ (match_dup 2)
+ (match_dup 3)))]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[3] = gen_lowpart (SImode, operands[3]);"
+ [(set_attr "type" "icmov")
+ (set_attr "mode" "SI")])
+
+(define_expand "mov<mode>cc"
+ [(set (match_operand:X87MODEF 0 "register_operand" "")
+ (if_then_else:X87MODEF
+ (match_operand 1 "comparison_operator" "")
+ (match_operand:X87MODEF 2 "register_operand" "")
+ (match_operand:X87MODEF 3 "register_operand" "")))]
+ "(TARGET_80387 && TARGET_CMOVE)
+ || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+ "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
+
+(define_insn "*movsfcc_1_387"
+ [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
+ (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
+ (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
+ "TARGET_80387 && TARGET_CMOVE
+ && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+ "@
+ fcmov%F1\t{%2, %0|%0, %2}
+ fcmov%f1\t{%3, %0|%0, %3}
+ cmov%O2%C1\t{%2, %0|%0, %2}
+ cmov%O2%c1\t{%3, %0|%0, %3}"
+ [(set_attr "type" "fcmov,fcmov,icmov,icmov")
+ (set_attr "mode" "SF,SF,SI,SI")])
+
+(define_insn "*movdfcc_1"
+ [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r")
+ (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0")
+ (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))]
+ "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
+ && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+ "@
+ fcmov%F1\t{%2, %0|%0, %2}
+ fcmov%f1\t{%3, %0|%0, %3}
+ #
+ #"
+ [(set_attr "type" "fcmov,fcmov,multi,multi")
+ (set_attr "mode" "DF")])
+
+(define_insn "*movdfcc_1_rex64"
+ [(set (match_operand:DF 0 "register_operand" "=f,f,r,r")
+ (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0")
+ (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))]
+ "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
+ && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+ "@
+ fcmov%F1\t{%2, %0|%0, %2}
+ fcmov%f1\t{%3, %0|%0, %3}
+ cmov%O2%C1\t{%2, %0|%0, %2}
+ cmov%O2%c1\t{%3, %0|%0, %3}"
+ [(set_attr "type" "fcmov,fcmov,icmov,icmov")
+ (set_attr "mode" "DF")])
+
+(define_split
+ [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "")
+ (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
+ [(match_operand 4 "flags_reg_operand" "")
+ (const_int 0)])
+ (match_operand:DF 2 "nonimmediate_operand" "")
+ (match_operand:DF 3 "nonimmediate_operand" "")))]
+ "!TARGET_64BIT && reload_completed"
+ [(set (match_dup 2)
+ (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
+ (match_dup 5)
+ (match_dup 6)))
+ (set (match_dup 3)
+ (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
+ (match_dup 7)
+ (match_dup 8)))]
+ "split_di (&operands[2], 2, &operands[5], &operands[7]);
+ split_di (&operands[0], 1, &operands[2], &operands[3]);")
+
+(define_insn "*movxfcc_1"
+ [(set (match_operand:XF 0 "register_operand" "=f,f")
+ (if_then_else:XF (match_operator 1 "fcmov_comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand:XF 2 "register_operand" "f,0")
+ (match_operand:XF 3 "register_operand" "0,f")))]
+ "TARGET_80387 && TARGET_CMOVE"
+ "@
+ fcmov%F1\t{%2, %0|%0, %2}
+ fcmov%f1\t{%3, %0|%0, %3}"
+ [(set_attr "type" "fcmov")
+ (set_attr "mode" "XF")])
+
+;; All moves in SSE5 pcmov instructions are 128 bits and hence we restrict
+;; the scalar versions to have only XMM registers as operands.
+
+;; SSE5 conditional move
+(define_insn "*sse5_pcmov_<mode>"
+ [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+ (if_then_else:MODEF
+ (match_operand:MODEF 1 "register_operand" "x,0")
+ (match_operand:MODEF 2 "register_operand" "0,x")
+ (match_operand:MODEF 3 "register_operand" "x,x")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
+ "pcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
+ [(set_attr "type" "sse4arg")])
+
+;; These versions of the min/max patterns are intentionally ignorant of
+;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
+;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
+;; are undefined in this condition, we're certain this is correct.
+
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (smaxmin:MODEF
+ (match_operand:MODEF 1 "nonimmediate_operand" "%x")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "v<maxminfprefix>s<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<code><mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (smaxmin:MODEF
+ (match_operand:MODEF 1 "nonimmediate_operand" "%0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "<maxminfprefix>s<ssemodefsuffix>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
+
+;; These versions of the min/max patterns implement exactly the operations
+;; min = (op1 < op2 ? op1 : op2)
+;; max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*avx_ieee_smin<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "x")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "vmins<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*ieee_smin<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "mins<ssemodefsuffix>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_ieee_smax<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "vmaxs<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*ieee_smax<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "0")
+ (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+ "maxs<ssemodefsuffix>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
+
+;; Make two stack loads independent:
+;; fld aa fld aa
+;; fld %st(0) -> fld bb
+;; fmul bb fmul %st(1), %st
+;;
+;; Actually we only match the last two instructions for simplicity.
+(define_peephole2
+ [(set (match_operand 0 "fp_register_operand" "")
+ (match_operand 1 "fp_register_operand" ""))
+ (set (match_dup 0)
+ (match_operator 2 "binary_fp_operator"
+ [(match_dup 0)
+ (match_operand 3 "memory_operand" "")]))]
+ "REGNO (operands[0]) != REGNO (operands[1])"
+ [(set (match_dup 0) (match_dup 3))
+ (set (match_dup 0) (match_dup 4))]
+
+ ;; The % modifier is not operational anymore in peephole2's, so we have to
+ ;; swap the operands manually in the case of addition and multiplication.
+ "if (COMMUTATIVE_ARITH_P (operands[2]))
+ operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+ operands[0], operands[1]);
+ else
+ operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+ operands[1], operands[0]);")
+
+;; Conditional addition patterns
+(define_expand "add<mode>cc"
+ [(match_operand:SWI 0 "register_operand" "")
+ (match_operand 1 "comparison_operator" "")
+ (match_operand:SWI 2 "register_operand" "")
+ (match_operand:SWI 3 "const_int_operand" "")]
+ ""
+ "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
+
+
+;; Misc patterns (?)
+
+;; This pattern exists to put a dependency on all ebp-based memory accesses.
+;; Otherwise there will be nothing to keep
+;;
+;; [(set (reg ebp) (reg esp))]
+;; [(set (reg esp) (plus (reg esp) (const_int -160000)))
+;; (clobber (eflags)]
+;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))]
+;;
+;; in proper program order.
+(define_insn "pro_epilogue_adjust_stack_1"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (plus:SI (match_operand:SI 1 "register_operand" "0,r")
+ (match_operand:SI 2 "immediate_operand" "i,i")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))]
+ "!TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOV:
+ return "mov{l}\t{%1, %0|%0, %1}";
+
+ case TYPE_ALU:
+ if (CONST_INT_P (operands[2])
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{l}\t{%2, %0|%0, %2}";
+ }
+ return "add{l}\t{%2, %0|%0, %2}";
+
+ case TYPE_LEA:
+ operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ return "lea{l}\t{%a2, %0|%0, %a2}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "0")
+ (const_string "alu")
+ (match_operand:SI 2 "const0_operand" "")
+ (const_string "imov")
+ ]
+ (const_string "lea")))
+ (set_attr "mode" "SI")])
+
+(define_insn "pro_epilogue_adjust_stack_rex64"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (plus:DI (match_operand:DI 1 "register_operand" "0,r")
+ (match_operand:DI 2 "x86_64_immediate_operand" "e,e")))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))]
+ "TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_IMOV:
+ return "mov{q}\t{%1, %0|%0, %1}";
+
+ case TYPE_ALU:
+ if (CONST_INT_P (operands[2])
+ /* Avoid overflows. */
+ && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))
+ && (INTVAL (operands[2]) == 128
+ || (INTVAL (operands[2]) < 0
+ && INTVAL (operands[2]) != -128)))
+ {
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ return "sub{q}\t{%2, %0|%0, %2}";
+ }
+ return "add{q}\t{%2, %0|%0, %2}";
+
+ case TYPE_LEA:
+ operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ return "lea{q}\t{%a2, %0|%0, %a2}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set (attr "type")
+ (cond [(eq_attr "alternative" "0")
+ (const_string "alu")
+ (match_operand:DI 2 "const0_operand" "")
+ (const_string "imov")
+ ]
+ (const_string "lea")))
+ (set_attr "mode" "DI")])
+
+(define_insn "pro_epilogue_adjust_stack_rex64_2"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (plus:DI (match_operand:DI 1 "register_operand" "0,r")
+ (match_operand:DI 3 "immediate_operand" "i,i")))
+ (use (match_operand:DI 2 "register_operand" "r,r"))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))]
+ "TARGET_64BIT"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ALU:
+ return "add{q}\t{%2, %0|%0, %2}";
+
+ case TYPE_LEA:
+ operands[2] = gen_rtx_PLUS (DImode, operands[1], operands[2]);
+ return "lea{q}\t{%a2, %0|%0, %a2}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "alu,lea")
+ (set_attr "mode" "DI")])
+
+(define_insn "allocate_stack_worker_32"
+ [(set (match_operand:SI 0 "register_operand" "=a")
+ (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "0")]
+ UNSPECV_STACK_PROBE))
+ (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_STACK_PROBE"
+{
+ if (flag_pic)
+ return "call\t___chkstk@PLT";
+ else
+ return "call\t___chkstk";
+}
+ [(set_attr "type" "multi")
+ (set_attr "length" "5")])
+
+(define_insn "allocate_stack_worker_64"
+ [(set (match_operand:DI 0 "register_operand" "=a")
+ (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")]
+ UNSPECV_STACK_PROBE))
+ (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 1)))
+ (clobber (reg:DI R10_REG))
+ (clobber (reg:DI R11_REG))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_STACK_PROBE"
+{
+ if (flag_pic)
+ return "call\t___chkstk@PLT";
+ else
+ return "call\t___chkstk";
+}
+ [(set_attr "type" "multi")
+ (set_attr "length" "5")])
+
+(define_expand "allocate_stack"
+ [(match_operand 0 "register_operand" "")
+ (match_operand 1 "general_operand" "")]
+ "TARGET_STACK_PROBE"
+{
+ rtx x;
+
+#ifndef CHECK_STACK_LIMIT
+#define CHECK_STACK_LIMIT 0
+#endif
+
+ if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1])
+ && INTVAL (operands[1]) < CHECK_STACK_LIMIT)
+ {
+ x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, operands[1],
+ stack_pointer_rtx, 0, OPTAB_DIRECT);
+ if (x != stack_pointer_rtx)
+ emit_move_insn (stack_pointer_rtx, x);
+ }
+ else
+ {
+ x = copy_to_mode_reg (Pmode, operands[1]);
+ if (TARGET_64BIT)
+ x = gen_allocate_stack_worker_64 (x, x);
+ else
+ x = gen_allocate_stack_worker_32 (x, x);
+ emit_insn (x);
+ }
+
+ emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+ DONE;
+})
+
+(define_expand "builtin_setjmp_receiver"
+ [(label_ref (match_operand 0 "" ""))]
+ "!TARGET_64BIT && flag_pic"
+{
+#if TARGET_MACHO
+ if (TARGET_MACHO)
+ {
+ rtx xops[3];
+ rtx picreg = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
+ rtx label_rtx = gen_label_rtx ();
+ emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
+ xops[0] = xops[1] = picreg;
+ xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx));
+ ix86_expand_binary_operator (MINUS, SImode, xops);
+ }
+ else
+#endif
+ emit_insn (gen_set_got (pic_offset_table_rtx));
+ DONE;
+})
+
+;; Avoid redundant prefixes by splitting HImode arithmetic to SImode.
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (match_operator 3 "promotable_binary_operator"
+ [(match_operand 1 "register_operand" "")
+ (match_operand 2 "aligned_operand" "")]))
+ (clobber (reg:CC FLAGS_REG))]
+ "! TARGET_PARTIAL_REG_STALL && reload_completed
+ && ((GET_MODE (operands[0]) == HImode
+ && ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX)
+ /* ??? next two lines just !satisfies_constraint_K (...) */
+ || !CONST_INT_P (operands[2])
+ || satisfies_constraint_K (operands[2])))
+ || (GET_MODE (operands[0]) == QImode
+ && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ if (GET_CODE (operands[3]) != ASHIFT)
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ PUT_MODE (operands[3], SImode);")
+
+; Promote the QImode tests, as i386 has encoding of the AND
+; instruction with 32-bit sign-extended immediate and thus the
+; instruction size is unchanged, except in the %eax case for
+; which it is increased by one byte, hence the ! optimize_size.
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 2 "compare_operator"
+ [(and (match_operand 3 "aligned_operand" "")
+ (match_operand 4 "const_int_operand" ""))
+ (const_int 0)]))
+ (set (match_operand 1 "register_operand" "")
+ (and (match_dup 3) (match_dup 4)))]
+ "! TARGET_PARTIAL_REG_STALL && reload_completed
+ && optimize_insn_for_speed_p ()
+ && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
+ || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))
+ /* Ensure that the operand will remain sign-extended immediate. */
+ && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4))
+ (const_int 0)]))
+ (set (match_dup 1)
+ (and:SI (match_dup 3) (match_dup 4)))])]
+{
+ operands[4]
+ = gen_int_mode (INTVAL (operands[4])
+ & GET_MODE_MASK (GET_MODE (operands[1])), SImode);
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[3] = gen_lowpart (SImode, operands[3]);
+})
+
+; Don't promote the QImode tests, as i386 doesn't have encoding of
+; the TEST instruction with 32-bit sign-extended immediate and thus
+; the instruction size would at least double, which is not what we
+; want even with ! optimize_size.
+(define_split
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(and (match_operand:HI 2 "aligned_operand" "")
+ (match_operand:HI 3 "const_int_operand" ""))
+ (const_int 0)]))]
+ "! TARGET_PARTIAL_REG_STALL && reload_completed
+ && ! TARGET_FAST_PREFIX
+ && optimize_insn_for_speed_p ()
+ /* Ensure that the operand will remain sign-extended immediate. */
+ && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)"
+ [(set (match_dup 0)
+ (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
+ (const_int 0)]))]
+{
+ operands[3]
+ = gen_int_mode (INTVAL (operands[3])
+ & GET_MODE_MASK (GET_MODE (operands[2])), SImode);
+ operands[2] = gen_lowpart (SImode, operands[2]);
+})
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (neg (match_operand 1 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "! TARGET_PARTIAL_REG_STALL && reload_completed
+ && (GET_MODE (operands[0]) == HImode
+ || (GET_MODE (operands[0]) == QImode
+ && (TARGET_PROMOTE_QImode
+ || optimize_insn_for_size_p ())))"
+ [(parallel [(set (match_dup 0)
+ (neg:SI (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);")
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (not (match_operand 1 "register_operand" "")))]
+ "! TARGET_PARTIAL_REG_STALL && reload_completed
+ && (GET_MODE (operands[0]) == HImode
+ || (GET_MODE (operands[0]) == QImode
+ && (TARGET_PROMOTE_QImode
+ || optimize_insn_for_size_p ())))"
+ [(set (match_dup 0)
+ (not:SI (match_dup 1)))]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[1] = gen_lowpart (SImode, operands[1]);")
+
+(define_split
+ [(set (match_operand 0 "register_operand" "")
+ (if_then_else (match_operator 1 "comparison_operator"
+ [(reg FLAGS_REG) (const_int 0)])
+ (match_operand 2 "register_operand" "")
+ (match_operand 3 "register_operand" "")))]
+ "! TARGET_PARTIAL_REG_STALL && TARGET_CMOVE
+ && (GET_MODE (operands[0]) == HImode
+ || (GET_MODE (operands[0]) == QImode
+ && (TARGET_PROMOTE_QImode
+ || optimize_insn_for_size_p ())))"
+ [(set (match_dup 0)
+ (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
+ "operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[3] = gen_lowpart (SImode, operands[3]);")
+
+
+;; RTL Peephole optimizations, run before sched2. These primarily look to
+;; transform a complex memory operation into two memory to register operations.
+
+;; Don't push memory operands
+(define_peephole2
+ [(set (match_operand:SI 0 "push_operand" "")
+ (match_operand:SI 1 "memory_operand" ""))
+ (match_scratch:SI 2 "r")]
+ "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+ && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_peephole2
+ [(set (match_operand:DI 0 "push_operand" "")
+ (match_operand:DI 1 "memory_operand" ""))
+ (match_scratch:DI 2 "r")]
+ "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+ && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+;; We need to handle SFmode only, because DFmode and XFmode is split to
+;; SImode pushes.
+(define_peephole2
+ [(set (match_operand:SF 0 "push_operand" "")
+ (match_operand:SF 1 "memory_operand" ""))
+ (match_scratch:SF 2 "r")]
+ "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+ && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_peephole2
+ [(set (match_operand:HI 0 "push_operand" "")
+ (match_operand:HI 1 "memory_operand" ""))
+ (match_scratch:HI 2 "r")]
+ "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+ && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_peephole2
+ [(set (match_operand:QI 0 "push_operand" "")
+ (match_operand:QI 1 "memory_operand" ""))
+ (match_scratch:QI 2 "q")]
+ "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+ && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+;; Don't move an immediate directly to memory when the instruction
+;; gets too big.
+(define_peephole2
+ [(match_scratch:SI 1 "r")
+ (set (match_operand:SI 0 "memory_operand" "")
+ (const_int 0))]
+ "optimize_insn_for_speed_p ()
+ && ! TARGET_USE_MOV0
+ && TARGET_SPLIT_LONG_MOVES
+ && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 1) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0) (match_dup 1))]
+ "")
+
+(define_peephole2
+ [(match_scratch:HI 1 "r")
+ (set (match_operand:HI 0 "memory_operand" "")
+ (const_int 0))]
+ "optimize_insn_for_speed_p ()
+ && ! TARGET_USE_MOV0
+ && TARGET_SPLIT_LONG_MOVES
+ && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 2) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0) (match_dup 1))]
+ "operands[2] = gen_lowpart (SImode, operands[1]);")
+
+(define_peephole2
+ [(match_scratch:QI 1 "q")
+ (set (match_operand:QI 0 "memory_operand" "")
+ (const_int 0))]
+ "optimize_insn_for_speed_p ()
+ && ! TARGET_USE_MOV0
+ && TARGET_SPLIT_LONG_MOVES
+ && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 2) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0) (match_dup 1))]
+ "operands[2] = gen_lowpart (SImode, operands[1]);")
+
+(define_peephole2
+ [(match_scratch:SI 2 "r")
+ (set (match_operand:SI 0 "memory_operand" "")
+ (match_operand:SI 1 "immediate_operand" ""))]
+ "optimize_insn_for_speed_p ()
+ && TARGET_SPLIT_LONG_MOVES
+ && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_peephole2
+ [(match_scratch:HI 2 "r")
+ (set (match_operand:HI 0 "memory_operand" "")
+ (match_operand:HI 1 "immediate_operand" ""))]
+ "optimize_insn_for_speed_p ()
+ && TARGET_SPLIT_LONG_MOVES
+ && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_peephole2
+ [(match_scratch:QI 2 "q")
+ (set (match_operand:QI 0 "memory_operand" "")
+ (match_operand:QI 1 "immediate_operand" ""))]
+ "optimize_insn_for_speed_p ()
+ && TARGET_SPLIT_LONG_MOVES
+ && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+;; Don't compare memory with zero, load and use a test instead.
+(define_peephole2
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(match_operand:SI 2 "memory_operand" "")
+ (const_int 0)]))
+ (match_scratch:SI 3 "r")]
+ "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)"
+ [(set (match_dup 3) (match_dup 2))
+ (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))]
+ "")
+
+;; NOT is not pairable on Pentium, while XOR is, but one byte longer.
+;; Don't split NOTs with a displacement operand, because resulting XOR
+;; will not be pairable anyway.
+;;
+;; On AMD K6, NOT is vector decoded with memory operand that cannot be
+;; represented using a modRM byte. The XOR replacement is long decoded,
+;; so this split helps here as well.
+;;
+;; Note: Can't do this as a regular split because we can't get proper
+;; lifetime information then.
+
+(define_peephole2
+ [(set (match_operand:SI 0 "nonimmediate_operand" "")
+ (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
+ "optimize_insn_for_speed_p ()
+ && ((TARGET_NOT_UNPAIRABLE
+ && (!MEM_P (operands[0])
+ || !memory_displacement_operand (operands[0], SImode)))
+ || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], SImode)))
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0)
+ (xor:SI (match_dup 1) (const_int -1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_peephole2
+ [(set (match_operand:HI 0 "nonimmediate_operand" "")
+ (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))]
+ "optimize_insn_for_speed_p ()
+ && ((TARGET_NOT_UNPAIRABLE
+ && (!MEM_P (operands[0])
+ || !memory_displacement_operand (operands[0], HImode)))
+ || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], HImode)))
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0)
+ (xor:HI (match_dup 1) (const_int -1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_peephole2
+ [(set (match_operand:QI 0 "nonimmediate_operand" "")
+ (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))]
+ "optimize_insn_for_speed_p ()
+ && ((TARGET_NOT_UNPAIRABLE
+ && (!MEM_P (operands[0])
+ || !memory_displacement_operand (operands[0], QImode)))
+ || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], QImode)))
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0)
+ (xor:QI (match_dup 1) (const_int -1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+;; Non pairable "test imm, reg" instructions can be translated to
+;; "and imm, reg" if reg dies. The "and" form is also shorter (one
+;; byte opcode instead of two, have a short form for byte operands),
+;; so do it for other CPUs as well. Given that the value was dead,
+;; this should not create any new dependencies. Pass on the sub-word
+;; versions if we're concerned about partial register stalls.
+
+(define_peephole2
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(and:SI (match_operand:SI 2 "register_operand" "")
+ (match_operand:SI 3 "immediate_operand" ""))
+ (const_int 0)]))]
+ "ix86_match_ccmode (insn, CCNOmode)
+ && (true_regnum (operands[2]) != AX_REG
+ || satisfies_constraint_K (operands[3]))
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel
+ [(set (match_dup 0)
+ (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
+ (const_int 0)]))
+ (set (match_dup 2)
+ (and:SI (match_dup 2) (match_dup 3)))])]
+ "")
+
+;; We don't need to handle HImode case, because it will be promoted to SImode
+;; on ! TARGET_PARTIAL_REG_STALL
+
+(define_peephole2
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(and:QI (match_operand:QI 2 "register_operand" "")
+ (match_operand:QI 3 "immediate_operand" ""))
+ (const_int 0)]))]
+ "! TARGET_PARTIAL_REG_STALL
+ && ix86_match_ccmode (insn, CCNOmode)
+ && true_regnum (operands[2]) != AX_REG
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel
+ [(set (match_dup 0)
+ (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3))
+ (const_int 0)]))
+ (set (match_dup 2)
+ (and:QI (match_dup 2) (match_dup 3)))])]
+ "")
+
+(define_peephole2
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(and:SI
+ (zero_extract:SI
+ (match_operand 2 "ext_register_operand" "")
+ (const_int 8)
+ (const_int 8))
+ (match_operand 3 "const_int_operand" ""))
+ (const_int 0)]))]
+ "! TARGET_PARTIAL_REG_STALL
+ && ix86_match_ccmode (insn, CCNOmode)
+ && true_regnum (operands[2]) != AX_REG
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 1
+ [(and:SI
+ (zero_extract:SI
+ (match_dup 2)
+ (const_int 8)
+ (const_int 8))
+ (match_dup 3))
+ (const_int 0)]))
+ (set (zero_extract:SI (match_dup 2)
+ (const_int 8)
+ (const_int 8))
+ (and:SI
+ (zero_extract:SI
+ (match_dup 2)
+ (const_int 8)
+ (const_int 8))
+ (match_dup 3)))])]
+ "")
+
+;; Don't do logical operations with memory inputs.
+(define_peephole2
+ [(match_scratch:SI 2 "r")
+ (parallel [(set (match_operand:SI 0 "register_operand" "")
+ (match_operator:SI 3 "arith_or_logical_operator"
+ [(match_dup 0)
+ (match_operand:SI 1 "memory_operand" "")]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+ [(set (match_dup 2) (match_dup 1))
+ (parallel [(set (match_dup 0)
+ (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:SI 2 "r")
+ (parallel [(set (match_operand:SI 0 "register_operand" "")
+ (match_operator:SI 3 "arith_or_logical_operator"
+ [(match_operand:SI 1 "memory_operand" "")
+ (match_dup 0)]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+ [(set (match_dup 2) (match_dup 1))
+ (parallel [(set (match_dup 0)
+ (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when the memory address
+;; refers to the destination of the load!
+
+(define_peephole2
+ [(set (match_operand:SI 0 "register_operand" "")
+ (match_operand:SI 1 "register_operand" ""))
+ (parallel [(set (match_dup 0)
+ (match_operator:SI 3 "commutative_operator"
+ [(match_dup 0)
+ (match_operand:SI 2 "memory_operand" "")]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "REGNO (operands[0]) != REGNO (operands[1])
+ && GENERAL_REGNO_P (REGNO (operands[0]))
+ && GENERAL_REGNO_P (REGNO (operands[1]))"
+ [(set (match_dup 0) (match_dup 4))
+ (parallel [(set (match_dup 0)
+ (match_op_dup 3 [(match_dup 0) (match_dup 1)]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[4] = replace_rtx (operands[2], operands[0], operands[1]);")
+
+(define_peephole2
+ [(set (match_operand 0 "register_operand" "")
+ (match_operand 1 "register_operand" ""))
+ (set (match_dup 0)
+ (match_operator 3 "commutative_operator"
+ [(match_dup 0)
+ (match_operand 2 "memory_operand" "")]))]
+ "REGNO (operands[0]) != REGNO (operands[1])
+ && ((MMX_REG_P (operands[0]) && MMX_REG_P (operands[1]))
+ || (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1])))"
+ [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 0)
+ (match_op_dup 3 [(match_dup 0) (match_dup 1)]))]
+ "")
+
+; Don't do logical operations with memory outputs
+;
+; These two don't make sense for PPro/PII -- we're expanding a 4-uop
+; instruction into two 1-uop insns plus a 2-uop insn. That last has
+; the same decoder scheduling characteristics as the original.
+
+(define_peephole2
+ [(match_scratch:SI 2 "r")
+ (parallel [(set (match_operand:SI 0 "memory_operand" "")
+ (match_operator:SI 3 "arith_or_logical_operator"
+ [(match_dup 0)
+ (match_operand:SI 1 "nonmemory_operand" "")]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE"
+ [(set (match_dup 2) (match_dup 0))
+ (parallel [(set (match_dup 2)
+ (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+(define_peephole2
+ [(match_scratch:SI 2 "r")
+ (parallel [(set (match_operand:SI 0 "memory_operand" "")
+ (match_operator:SI 3 "arith_or_logical_operator"
+ [(match_operand:SI 1 "nonmemory_operand" "")
+ (match_dup 0)]))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE"
+ [(set (match_dup 2) (match_dup 0))
+ (parallel [(set (match_dup 2)
+ (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0) (match_dup 2))]
+ "")
+
+;; Attempt to always use XOR for zeroing registers.
+(define_peephole2
+ [(set (match_operand 0 "register_operand" "")
+ (match_operand 1 "const0_operand" ""))]
+ "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+ && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
+ && GENERAL_REG_P (operands[0])
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[0] = gen_lowpart (word_mode, operands[0]);
+})
+
+(define_peephole2
+ [(set (strict_low_part (match_operand 0 "register_operand" ""))
+ (const_int 0))]
+ "(GET_MODE (operands[0]) == QImode
+ || GET_MODE (operands[0]) == HImode)
+ && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])])
+
+;; For HI and SI modes, or $-1,reg is smaller than mov $-1,reg.
+(define_peephole2
+ [(set (match_operand 0 "register_operand" "")
+ (const_int -1))]
+ "(GET_MODE (operands[0]) == HImode
+ || GET_MODE (operands[0]) == SImode
+ || (GET_MODE (operands[0]) == DImode && TARGET_64BIT))
+ && (optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR)
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (const_int -1))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[0] = gen_lowpart (GET_MODE (operands[0]) == DImode ? DImode : SImode,
+ operands[0]);")
+
+;; Attempt to convert simple leas to adds. These can be created by
+;; move expanders.
+(define_peephole2
+ [(set (match_operand:SI 0 "register_operand" "")
+ (plus:SI (match_dup 0)
+ (match_operand:SI 1 "nonmemory_operand" "")))]
+ "peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_peephole2
+ [(set (match_operand:SI 0 "register_operand" "")
+ (subreg:SI (plus:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "nonmemory_operand" "")) 0))]
+ "peep2_regno_dead_p (0, FLAGS_REG) && REGNO (operands[0]) == REGNO (operands[1])"
+ [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = gen_lowpart (SImode, operands[2]);")
+
+(define_peephole2
+ [(set (match_operand:DI 0 "register_operand" "")
+ (plus:DI (match_dup 0)
+ (match_operand:DI 1 "x86_64_general_operand" "")))]
+ "peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "")
+
+(define_peephole2
+ [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_dup 0)
+ (match_operand:SI 1 "const_int_operand" "")))]
+ "exact_log2 (INTVAL (operands[1])) >= 0
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+(define_peephole2
+ [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (match_dup 0)
+ (match_operand:DI 1 "const_int_operand" "")))]
+ "exact_log2 (INTVAL (operands[1])) >= 0
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+
+(define_peephole2
+ [(set (match_operand:SI 0 "register_operand" "")
+ (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "const_int_operand" "")) 0))]
+ "exact_log2 (INTVAL (operands[2])) >= 0
+ && REGNO (operands[0]) == REGNO (operands[1])
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
+
+;; The ESP adjustments can be done by the push and pop instructions. Resulting
+;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes. On
+;; many CPUs it is also faster, since special hardware to avoid esp
+;; dependencies is present.
+
+;; While some of these conversions may be done using splitters, we use peepholes
+;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL.
+
+;; Convert prologue esp subtractions to push.
+;; We need register to push. In order to keep verify_flow_info happy we have
+;; two choices
+;; - use scratch and clobber it in order to avoid dependencies
+;; - use already live register
+;; We can't use the second way right now, since there is no reliable way how to
+;; verify that given register is live. First choice will also most likely in
+;; fewer dependencies. On the place of esp adjustments it is very likely that
+;; call clobbered registers are dead. We may want to use base pointer as an
+;; alternative when no register is available later.
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
+ [(clobber (match_dup 0))
+ (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+ (clobber (mem:BLK (scratch)))])])
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
+ [(clobber (match_dup 0))
+ (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+ (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+ (clobber (mem:BLK (scratch)))])])
+
+;; Convert esp subtractions to push.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
+ [(clobber (match_dup 0))
+ (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))])
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
+ [(clobber (match_dup 0))
+ (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
+ (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))])
+
+;; Convert epilogue deallocator to pop.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4"
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+ (clobber (mem:BLK (scratch)))])]
+ "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (match_scratch:SI 1 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8"
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+ (clobber (mem:BLK (scratch)))])
+ (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_insn_for_size_p ()"
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+ (clobber (mem:BLK (scratch)))])
+ (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+ "")
+
+;; Convert esp additions to pop.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+ "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (match_scratch:SI 1 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])
+ (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:SI 0 "r")
+ (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_insn_for_size_p ()"
+ [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])
+ (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
+ "")
+
+;; Convert compares with 1 to shorter inc/dec operations when CF is not
+;; required and register dies. Similarly for 128 to -128.
+(define_peephole2
+ [(set (match_operand 0 "flags_reg_operand" "")
+ (match_operator 1 "compare_operator"
+ [(match_operand 2 "register_operand" "")
+ (match_operand 3 "const_int_operand" "")]))]
+ "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_size)
+ && incdec_operand (operands[3], GET_MODE (operands[3])))
+ || (!TARGET_FUSE_CMP_AND_BRANCH
+ && INTVAL (operands[3]) == 128))
+ && ix86_match_ccmode (insn, CCGCmode)
+ && peep2_reg_dead_p (1, operands[2])"
+ [(parallel [(set (match_dup 0)
+ (match_op_dup 1 [(match_dup 2) (match_dup 3)]))
+ (clobber (match_dup 2))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
+ [(clobber (match_dup 0))
+ (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
+ (clobber (mem:BLK (scratch)))])])
+
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
+ [(clobber (match_dup 0))
+ (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
+ (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
+ (clobber (mem:BLK (scratch)))])])
+
+;; Convert esp subtractions to push.
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
+ [(clobber (match_dup 0))
+ (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))])
+
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
+ [(clobber (match_dup 0))
+ (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
+ (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))])
+
+;; Convert epilogue deallocator to pop.
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4"
+ [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+ (clobber (mem:BLK (scratch)))])]
+ "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (match_scratch:DI 1 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8"
+ [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+ (clobber (mem:BLK (scratch)))])
+ (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
+ (clobber (reg:CC FLAGS_REG))
+ (clobber (mem:BLK (scratch)))])]
+ "optimize_insn_for_size_p ()"
+ [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+ (clobber (mem:BLK (scratch)))])
+ (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+ "")
+
+;; Convert esp additions to pop.
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+ "")
+
+;; Two pops case is tricky, since pop causes dependency on destination register.
+;; We use two registers if available.
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (match_scratch:DI 1 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+ [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])
+ (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+ "")
+
+(define_peephole2
+ [(match_scratch:DI 0 "r")
+ (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_insn_for_size_p ()"
+ [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])
+ (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
+ (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
+ "")
+
+;; Convert imul by three, five and nine into lea
+(define_peephole2
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "INTVAL (operands[2]) == 3
+ || INTVAL (operands[2]) == 5
+ || INTVAL (operands[2]) == 9"
+ [(set (match_dup 0)
+ (plus:SI (mult:SI (match_dup 1) (match_dup 2))
+ (match_dup 1)))]
+ { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+
+(define_peephole2
+ [(parallel
+ [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "optimize_insn_for_speed_p ()
+ && (INTVAL (operands[2]) == 3
+ || INTVAL (operands[2]) == 5
+ || INTVAL (operands[2]) == 9)"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 0)
+ (plus:SI (mult:SI (match_dup 0) (match_dup 2))
+ (match_dup 0)))]
+ { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+
+(define_peephole2
+ [(parallel
+ [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT
+ && (INTVAL (operands[2]) == 3
+ || INTVAL (operands[2]) == 5
+ || INTVAL (operands[2]) == 9)"
+ [(set (match_dup 0)
+ (plus:DI (mult:DI (match_dup 1) (match_dup 2))
+ (match_dup 1)))]
+ { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+
+(define_peephole2
+ [(parallel
+ [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_64BIT
+ && optimize_insn_for_speed_p ()
+ && (INTVAL (operands[2]) == 3
+ || INTVAL (operands[2]) == 5
+ || INTVAL (operands[2]) == 9)"
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 0)
+ (plus:DI (mult:DI (match_dup 0) (match_dup 2))
+ (match_dup 0)))]
+ { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); })
+
+;; Imul $32bit_imm, mem, reg is vector decoded, while
+;; imul $32bit_imm, reg, reg is direct decoded.
+(define_peephole2
+ [(match_scratch:DI 3 "r")
+ (parallel [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (match_operand:DI 1 "memory_operand" "")
+ (match_operand:DI 2 "immediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
+ && !satisfies_constraint_K (operands[2])"
+ [(set (match_dup 3) (match_dup 1))
+ (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+"")
+
+(define_peephole2
+ [(match_scratch:SI 3 "r")
+ (parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_operand:SI 1 "memory_operand" "")
+ (match_operand:SI 2 "immediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
+ && !satisfies_constraint_K (operands[2])"
+ [(set (match_dup 3) (match_dup 1))
+ (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+"")
+
+(define_peephole2
+ [(match_scratch:SI 3 "r")
+ (parallel [(set (match_operand:DI 0 "register_operand" "")
+ (zero_extend:DI
+ (mult:SI (match_operand:SI 1 "memory_operand" "")
+ (match_operand:SI 2 "immediate_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
+ && !satisfies_constraint_K (operands[2])"
+ [(set (match_dup 3) (match_dup 1))
+ (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
+ (clobber (reg:CC FLAGS_REG))])]
+"")
+
+;; imul $8/16bit_imm, regmem, reg is vector decoded.
+;; Convert it into imul reg, reg
+;; It would be better to force assembler to encode instruction using long
+;; immediate, but there is apparently no way to do so.
+(define_peephole2
+ [(parallel [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (match_operand:DI 1 "nonimmediate_operand" "")
+ (match_operand:DI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_scratch:DI 3 "r")]
+ "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
+ && satisfies_constraint_K (operands[2])"
+ [(set (match_dup 3) (match_dup 2))
+ (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+})
+
+(define_peephole2
+ [(parallel [(set (match_operand:SI 0 "register_operand" "")
+ (mult:SI (match_operand:SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_scratch:SI 3 "r")]
+ "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
+ && satisfies_constraint_K (operands[2])"
+ [(set (match_dup 3) (match_dup 2))
+ (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+})
+
+(define_peephole2
+ [(parallel [(set (match_operand:HI 0 "register_operand" "")
+ (mult:HI (match_operand:HI 1 "nonimmediate_operand" "")
+ (match_operand:HI 2 "immediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (match_scratch:HI 3 "r")]
+ "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()"
+ [(set (match_dup 3) (match_dup 2))
+ (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
+})
+
+;; After splitting up read-modify operations, array accesses with memory
+;; operands might end up in form:
+;; sall $2, %eax
+;; movl 4(%esp), %edx
+;; addl %edx, %eax
+;; instead of pre-splitting:
+;; sall $2, %eax
+;; addl 4(%esp), %eax
+;; Turn it into:
+;; movl 4(%esp), %edx
+;; leal (%edx,%eax,4), %eax
+
+(define_peephole2
+ [(parallel [(set (match_operand 0 "register_operand" "")
+ (ashift (match_operand 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_operand 3 "register_operand")
+ (match_operand 4 "x86_64_general_operand" ""))
+ (parallel [(set (match_operand 5 "register_operand" "")
+ (plus (match_operand 6 "register_operand" "")
+ (match_operand 7 "register_operand" "")))
+ (clobber (reg:CC FLAGS_REG))])]
+ "INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 3
+ /* Validate MODE for lea. */
+ && ((!TARGET_PARTIAL_REG_STALL
+ && (GET_MODE (operands[0]) == QImode
+ || GET_MODE (operands[0]) == HImode))
+ || GET_MODE (operands[0]) == SImode
+ || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
+ /* We reorder load and the shift. */
+ && !rtx_equal_p (operands[1], operands[3])
+ && !reg_overlap_mentioned_p (operands[0], operands[4])
+ /* Last PLUS must consist of operand 0 and 3. */
+ && !rtx_equal_p (operands[0], operands[3])
+ && (rtx_equal_p (operands[3], operands[6])
+ || rtx_equal_p (operands[3], operands[7]))
+ && (rtx_equal_p (operands[0], operands[6])
+ || rtx_equal_p (operands[0], operands[7]))
+ /* The intermediate operand 0 must die or be same as output. */
+ && (rtx_equal_p (operands[0], operands[5])
+ || peep2_reg_dead_p (3, operands[0]))"
+ [(set (match_dup 3) (match_dup 4))
+ (set (match_dup 0) (match_dup 1))]
+{
+ enum machine_mode mode = GET_MODE (operands[5]) == DImode ? DImode : SImode;
+ int scale = 1 << INTVAL (operands[2]);
+ rtx index = gen_lowpart (Pmode, operands[1]);
+ rtx base = gen_lowpart (Pmode, operands[3]);
+ rtx dest = gen_lowpart (mode, operands[5]);
+
+ operands[1] = gen_rtx_PLUS (Pmode, base,
+ gen_rtx_MULT (Pmode, index, GEN_INT (scale)));
+ if (mode != Pmode)
+ operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
+ operands[0] = dest;
+})
+
+;; Call-value patterns last so that the wildcard operand does not
+;; disrupt insn-recog's switch tables.
+
+(define_insn "*call_value_pop_0"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+ (match_operand:SI 2 "" "")))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 3 "immediate_operand" "")))]
+ "!TARGET_64BIT"
+{
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P1";
+ else
+ return "call\t%P1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*call_value_pop_1"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm"))
+ (match_operand:SI 2 "" "")))
+ (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+ (match_operand:SI 3 "immediate_operand" "i")))]
+ "!TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[1], Pmode))
+ {
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P1";
+ else
+ return "call\t%P1";
+ }
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%A1";
+ else
+ return "call\t%A1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*call_value_0"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" ""))
+ (match_operand:SI 2 "" "")))]
+ "!TARGET_64BIT"
+{
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P1";
+ else
+ return "call\t%P1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*call_value_0_rex64"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+ (match_operand:DI 2 "const_int_operand" "")))]
+ "TARGET_64BIT"
+{
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P1";
+ else
+ return "call\t%P1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*call_value_0_rex64_ms_sysv"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+ (match_operand:DI 2 "const_int_operand" "")))
+ (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+ (clobber (reg:TI XMM6_REG))
+ (clobber (reg:TI XMM7_REG))
+ (clobber (reg:TI XMM8_REG))
+ (clobber (reg:TI XMM9_REG))
+ (clobber (reg:TI XMM10_REG))
+ (clobber (reg:TI XMM11_REG))
+ (clobber (reg:TI XMM12_REG))
+ (clobber (reg:TI XMM13_REG))
+ (clobber (reg:TI XMM14_REG))
+ (clobber (reg:TI XMM15_REG))
+ (clobber (reg:DI SI_REG))
+ (clobber (reg:DI DI_REG))]
+ "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+{
+ if (SIBLING_CALL_P (insn))
+ return "jmp\t%P1";
+ else
+ return "call\t%P1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm"))
+ (match_operand:SI 2 "" "")))]
+ "!SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[1], Pmode))
+ return "call\t%P1";
+ return "call\t%A1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,c,d,a"))
+ (match_operand:SI 2 "" "")))]
+ "SIBLING_CALL_P (insn) && !TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[1], Pmode))
+ return "jmp\t%P1";
+ return "jmp\t%A1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1_rex64"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+ (match_operand:DI 2 "" "")))]
+ "!SIBLING_CALL_P (insn) && TARGET_64BIT
+ && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
+{
+ if (constant_call_address_operand (operands[1], Pmode))
+ return "call\t%P1";
+ return "call\t%A1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1_rex64_ms_sysv"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+ (match_operand:DI 2 "" "")))
+ (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+ (clobber (reg:TI 27))
+ (clobber (reg:TI 28))
+ (clobber (reg:TI 45))
+ (clobber (reg:TI 46))
+ (clobber (reg:TI 47))
+ (clobber (reg:TI 48))
+ (clobber (reg:TI 49))
+ (clobber (reg:TI 50))
+ (clobber (reg:TI 51))
+ (clobber (reg:TI 52))
+ (clobber (reg:DI SI_REG))
+ (clobber (reg:DI DI_REG))]
+ "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+{
+ if (constant_call_address_operand (operands[1], Pmode))
+ return "call\t%P1";
+ return "call\t%A1";
+}
+ [(set_attr "type" "callv")])
+
+(define_insn "*call_value_1_rex64_large"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
+ (match_operand:DI 2 "" "")))]
+ "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "call\t%A1"
+ [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1_rex64"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+ (match_operand:DI 2 "" "")))]
+ "SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "jmp\t%P1"
+ [(set_attr "type" "callv")])
+
+(define_insn "*sibcall_value_1_rex64_v"
+ [(set (match_operand 0 "" "")
+ (call (mem:QI (reg:DI R11_REG))
+ (match_operand:DI 1 "" "")))]
+ "SIBLING_CALL_P (insn) && TARGET_64BIT"
+ "jmp\t{*%%}r11"
+ [(set_attr "type" "callv")])
+
+;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
+;; That, however, is usually mapped by the OS to SIGSEGV, which is often
+;; caught for use by garbage collectors and the like. Using an insn that
+;; maps to SIGILL makes it more likely the program will rightfully die.
+;; Keeping with tradition, "6" is in honor of #UD.
+(define_insn "trap"
+ [(trap_if (const_int 1) (const_int 6))]
+ ""
+ { return ASM_SHORT "0x0b0f"; }
+ [(set_attr "length" "2")])
+
+(define_expand "sse_prologue_save"
+ [(parallel [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(reg:DI 21)
+ (reg:DI 22)
+ (reg:DI 23)
+ (reg:DI 24)
+ (reg:DI 25)
+ (reg:DI 26)
+ (reg:DI 27)
+ (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+ (use (match_operand:DI 1 "register_operand" ""))
+ (use (match_operand:DI 2 "immediate_operand" ""))
+ (use (label_ref:DI (match_operand 3 "" "")))])]
+ "TARGET_64BIT"
+ "")
+
+(define_insn "*sse_prologue_save_insn"
+ [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
+ (match_operand:DI 4 "const_int_operand" "n")))
+ (unspec:BLK [(reg:DI 21)
+ (reg:DI 22)
+ (reg:DI 23)
+ (reg:DI 24)
+ (reg:DI 25)
+ (reg:DI 26)
+ (reg:DI 27)
+ (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+ (use (match_operand:DI 1 "register_operand" "r"))
+ (use (match_operand:DI 2 "const_int_operand" "i"))
+ (use (label_ref:DI (match_operand 3 "" "X")))]
+ "TARGET_64BIT
+ && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
+ && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
+{
+ int i;
+ operands[0] = gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode, operands[0], operands[4]));
+ /* VEX instruction with a REX prefix will #UD. */
+ if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS)
+ gcc_unreachable ();
+
+ output_asm_insn ("jmp\t%A1", operands);
+ for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
+ {
+ operands[4] = adjust_address (operands[0], DImode, i*16);
+ operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
+ PUT_MODE (operands[4], TImode);
+ if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
+ output_asm_insn ("rex", operands);
+ output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
+ }
+ (*targetm.asm_out.internal_label) (asm_out_file, "L",
+ CODE_LABEL_NUMBER (operands[3]));
+ return "";
+}
+ [(set_attr "type" "other")
+ (set_attr "length_immediate" "0")
+ (set_attr "length_address" "0")
+ (set (attr "length")
+ (if_then_else
+ (eq (symbol_ref "TARGET_AVX") (const_int 0))
+ (const_string "34")
+ (const_string "42")))
+ (set_attr "memory" "store")
+ (set_attr "modrm" "0")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "DI")])
+
+(define_expand "prefetch"
+ [(prefetch (match_operand 0 "address_operand" "")
+ (match_operand:SI 1 "const_int_operand" "")
+ (match_operand:SI 2 "const_int_operand" ""))]
+ "TARGET_PREFETCH_SSE || TARGET_3DNOW"
+{
+ int rw = INTVAL (operands[1]);
+ int locality = INTVAL (operands[2]);
+
+ gcc_assert (rw == 0 || rw == 1);
+ gcc_assert (locality >= 0 && locality <= 3);
+ gcc_assert (GET_MODE (operands[0]) == Pmode
+ || GET_MODE (operands[0]) == VOIDmode);
+
+ /* Use 3dNOW prefetch in case we are asking for write prefetch not
+ supported by SSE counterpart or the SSE prefetch is not available
+ (K6 machines). Otherwise use SSE prefetch as it allows specifying
+ of locality. */
+ if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
+ operands[2] = GEN_INT (3);
+ else
+ operands[1] = const0_rtx;
+})
+
+(define_insn "*prefetch_sse"
+ [(prefetch (match_operand:SI 0 "address_operand" "p")
+ (const_int 0)
+ (match_operand:SI 1 "const_int_operand" ""))]
+ "TARGET_PREFETCH_SSE && !TARGET_64BIT"
+{
+ static const char * const patterns[4] = {
+ "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+ };
+
+ int locality = INTVAL (operands[1]);
+ gcc_assert (locality >= 0 && locality <= 3);
+
+ return patterns[locality];
+}
+ [(set_attr "type" "sse")
+ (set_attr "memory" "none")])
+
+(define_insn "*prefetch_sse_rex"
+ [(prefetch (match_operand:DI 0 "address_operand" "p")
+ (const_int 0)
+ (match_operand:SI 1 "const_int_operand" ""))]
+ "TARGET_PREFETCH_SSE && TARGET_64BIT"
+{
+ static const char * const patterns[4] = {
+ "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+ };
+
+ int locality = INTVAL (operands[1]);
+ gcc_assert (locality >= 0 && locality <= 3);
+
+ return patterns[locality];
+}
+ [(set_attr "type" "sse")
+ (set_attr "memory" "none")])
+
+(define_insn "*prefetch_3dnow"
+ [(prefetch (match_operand:SI 0 "address_operand" "p")
+ (match_operand:SI 1 "const_int_operand" "n")
+ (const_int 3))]
+ "TARGET_3DNOW && !TARGET_64BIT"
+{
+ if (INTVAL (operands[1]) == 0)
+ return "prefetch\t%a0";
+ else
+ return "prefetchw\t%a0";
+}
+ [(set_attr "type" "mmx")
+ (set_attr "memory" "none")])
+
+(define_insn "*prefetch_3dnow_rex"
+ [(prefetch (match_operand:DI 0 "address_operand" "p")
+ (match_operand:SI 1 "const_int_operand" "n")
+ (const_int 3))]
+ "TARGET_3DNOW && TARGET_64BIT"
+{
+ if (INTVAL (operands[1]) == 0)
+ return "prefetch\t%a0";
+ else
+ return "prefetchw\t%a0";
+}
+ [(set_attr "type" "mmx")
+ (set_attr "memory" "none")])
+
+(define_expand "stack_protect_set"
+ [(match_operand 0 "memory_operand" "")
+ (match_operand 1 "memory_operand" "")]
+ ""
+{
+#ifdef TARGET_THREAD_SSP_OFFSET
+ if (TARGET_64BIT)
+ emit_insn (gen_stack_tls_protect_set_di (operands[0],
+ GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+ else
+ emit_insn (gen_stack_tls_protect_set_si (operands[0],
+ GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+#else
+ if (TARGET_64BIT)
+ emit_insn (gen_stack_protect_set_di (operands[0], operands[1]));
+ else
+ emit_insn (gen_stack_protect_set_si (operands[0], operands[1]));
+#endif
+ DONE;
+})
+
+(define_insn "stack_protect_set_si"
+ [(set (match_operand:SI 0 "memory_operand" "=m")
+ (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+ (set (match_scratch:SI 2 "=&r") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "mov{l}\t{%1, %2|%2, %1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2"
+ [(set_attr "type" "multi")])
+
+(define_insn "stack_protect_set_di"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+ (set (match_scratch:DI 2 "=&r") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ "mov{q}\t{%1, %2|%2, %1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
+ [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_set_si"
+ [(set (match_operand:SI 0 "memory_operand" "=m")
+ (unspec:SI [(match_operand:SI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET))
+ (set (match_scratch:SI 2 "=&r") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2"
+ [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_set_di"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (unspec:DI [(match_operand:DI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET))
+ (set (match_scratch:DI 2 "=&r") (const_int 0))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ {
+ /* The kernel uses a different segment register for performance reasons; a
+ system call would not have to trash the userspace segment register,
+ which would be expensive */
+ if (ix86_cmodel != CM_KERNEL)
+ return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
+ else
+ return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
+ }
+ [(set_attr "type" "multi")])
+
+(define_expand "stack_protect_test"
+ [(match_operand 0 "memory_operand" "")
+ (match_operand 1 "memory_operand" "")
+ (match_operand 2 "" "")]
+ ""
+{
+ rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+ ix86_compare_op0 = operands[0];
+ ix86_compare_op1 = operands[1];
+ ix86_compare_emitted = flags;
+
+#ifdef TARGET_THREAD_SSP_OFFSET
+ if (TARGET_64BIT)
+ emit_insn (gen_stack_tls_protect_test_di (flags, operands[0],
+ GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+ else
+ emit_insn (gen_stack_tls_protect_test_si (flags, operands[0],
+ GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+#else
+ if (TARGET_64BIT)
+ emit_insn (gen_stack_protect_test_di (flags, operands[0], operands[1]));
+ else
+ emit_insn (gen_stack_protect_test_si (flags, operands[0], operands[1]));
+#endif
+ emit_jump_insn (gen_beq (operands[2]));
+ DONE;
+})
+
+(define_insn "stack_protect_test_si"
+ [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+ (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m")
+ (match_operand:SI 2 "memory_operand" "m")]
+ UNSPEC_SP_TEST))
+ (clobber (match_scratch:SI 3 "=&r"))]
+ ""
+ "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%2, %3|%3, %2}"
+ [(set_attr "type" "multi")])
+
+(define_insn "stack_protect_test_di"
+ [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+ (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m")
+ (match_operand:DI 2 "memory_operand" "m")]
+ UNSPEC_SP_TEST))
+ (clobber (match_scratch:DI 3 "=&r"))]
+ "TARGET_64BIT"
+ "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%2, %3|%3, %2}"
+ [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_test_si"
+ [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+ (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m")
+ (match_operand:SI 2 "const_int_operand" "i")]
+ UNSPEC_SP_TLS_TEST))
+ (clobber (match_scratch:SI 3 "=r"))]
+ ""
+ "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR gs:%P2}"
+ [(set_attr "type" "multi")])
+
+(define_insn "stack_tls_protect_test_di"
+ [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+ (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m")
+ (match_operand:DI 2 "const_int_operand" "i")]
+ UNSPEC_SP_TLS_TEST))
+ (clobber (match_scratch:DI 3 "=r"))]
+ "TARGET_64BIT"
+ {
+ /* The kernel uses a different segment register for performance reasons; a
+ system call would not have to trash the userspace segment register,
+ which would be expensive */
+ if (ix86_cmodel != CM_KERNEL)
+ return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR fs:%P2}";
+ else
+ return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR gs:%P2}";
+ }
+ [(set_attr "type" "multi")])
+
+(define_mode_iterator CRC32MODE [QI HI SI])
+(define_mode_attr crc32modesuffix [(QI "b") (HI "w") (SI "l")])
+(define_mode_attr crc32modeconstraint [(QI "qm") (HI "rm") (SI "rm")])
+
+(define_insn "sse4_2_crc32<mode>"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI
+ [(match_operand:SI 1 "register_operand" "0")
+ (match_operand:CRC32MODE 2 "nonimmediate_operand" "<crc32modeconstraint>")]
+ UNSPEC_CRC32))]
+ "TARGET_SSE4_2"
+ "crc32<crc32modesuffix>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "SI")])
+
+(define_insn "sse4_2_crc32di"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI
+ [(match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "rm")]
+ UNSPEC_CRC32))]
+ "TARGET_SSE4_2 && TARGET_64BIT"
+ "crc32q\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+(include "mmx.md")
+(include "sse.md")
+(include "sync.md")
diff --git a/gcc-4.4.0/gcc/config/i386/i386.opt b/gcc-4.4.0/gcc/config/i386/i386.opt
new file mode 100644
index 000000000..05727be7b
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/i386.opt
@@ -0,0 +1,352 @@
+; Options for the IA-32 and AMD64 ports of the compiler.
+
+; Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3. If not see
+; <http://www.gnu.org/licenses/>.
+
+;; Definitions to add to the cl_target_option structure
+;; -march= processor
+TargetSave
+unsigned char arch
+
+;; -mtune= processor
+TargetSave
+unsigned char tune
+
+;; -mfpath=
+TargetSave
+unsigned char fpmath
+
+;; CPU schedule model
+TargetSave
+unsigned char schedule
+
+;; branch cost
+TargetSave
+unsigned char branch_cost
+
+;; which flags were passed by the user
+TargetSave
+int ix86_isa_flags_explicit
+
+;; which flags were passed by the user
+TargetSave
+int target_flags_explicit
+
+;; whether -mtune was not specified
+TargetSave
+unsigned char tune_defaulted
+
+;; whether -march was specified
+TargetSave
+unsigned char arch_specified
+
+;; x86 options
+m128bit-long-double
+Target RejectNegative Report Mask(128BIT_LONG_DOUBLE) Save
+sizeof(long double) is 16
+
+m80387
+Target Report Mask(80387) Save
+Use hardware fp
+
+m96bit-long-double
+Target RejectNegative Report InverseMask(128BIT_LONG_DOUBLE) Save
+sizeof(long double) is 12
+
+maccumulate-outgoing-args
+Target Report Mask(ACCUMULATE_OUTGOING_ARGS) Save
+Reserve space for outgoing arguments in the function prologue
+
+malign-double
+Target Report Mask(ALIGN_DOUBLE) Save
+Align some doubles on dword boundary
+
+malign-functions=
+Target RejectNegative Joined Var(ix86_align_funcs_string)
+Function starts are aligned to this power of 2
+
+malign-jumps=
+Target RejectNegative Joined Var(ix86_align_jumps_string)
+Jump targets are aligned to this power of 2
+
+malign-loops=
+Target RejectNegative Joined Var(ix86_align_loops_string)
+Loop code aligned to this power of 2
+
+malign-stringops
+Target RejectNegative Report InverseMask(NO_ALIGN_STRINGOPS, ALIGN_STRINGOPS) Save
+Align destination of the string operations
+
+march=
+Target RejectNegative Joined Var(ix86_arch_string)
+Generate code for given CPU
+
+masm=
+Target RejectNegative Joined Var(ix86_asm_string)
+Use given assembler dialect
+
+mbranch-cost=
+Target RejectNegative Joined Var(ix86_branch_cost_string)
+Branches are this expensive (1-5, arbitrary units)
+
+mlarge-data-threshold=
+Target RejectNegative Joined Var(ix86_section_threshold_string)
+Data greater than given threshold will go into .ldata section in x86-64 medium model
+
+mcmodel=
+Target RejectNegative Joined Var(ix86_cmodel_string)
+Use given x86-64 code model
+
+mfancy-math-387
+Target RejectNegative Report InverseMask(NO_FANCY_MATH_387, USE_FANCY_MATH_387) Save
+Generate sin, cos, sqrt for FPU
+
+mforce-drap
+Target Report Var(ix86_force_drap)
+Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack
+
+mfp-ret-in-387
+Target Report Mask(FLOAT_RETURNS) Save
+Return values of functions in FPU registers
+
+mfpmath=
+Target RejectNegative Joined Var(ix86_fpmath_string)
+Generate floating point mathematics using given instruction set
+
+mhard-float
+Target RejectNegative Mask(80387) MaskExists Save
+Use hardware fp
+
+mieee-fp
+Target Report Mask(IEEE_FP) Save
+Use IEEE math for fp comparisons
+
+minline-compares
+Target Report Mask(INLINE_COMPARES) Save
+Inline compare operations strcmp and memcmp
+
+minline-all-stringops
+Target Report Mask(INLINE_ALL_STRINGOPS) Save
+Inline all known string operations
+
+minline-stringops-dynamically
+Target Report Mask(INLINE_STRINGOPS_DYNAMICALLY) Save
+Inline memset/memcpy string operations, but perform inline version only for small blocks
+
+mintel-syntax
+Target Undocumented
+;; Deprecated
+
+mms-bitfields
+Target Report Mask(MS_BITFIELD_LAYOUT) Save
+Use native (MS) bitfield layout
+
+mno-align-stringops
+Target RejectNegative Report Mask(NO_ALIGN_STRINGOPS) Undocumented Save
+
+mno-fancy-math-387
+Target RejectNegative Report Mask(NO_FANCY_MATH_387) Undocumented Save
+
+mno-push-args
+Target RejectNegative Report Mask(NO_PUSH_ARGS) Undocumented Save
+
+mno-red-zone
+Target RejectNegative Report Mask(NO_RED_ZONE) Undocumented Save
+
+momit-leaf-frame-pointer
+Target Report Mask(OMIT_LEAF_FRAME_POINTER) Save
+Omit the frame pointer in leaf functions
+
+mpc
+Target RejectNegative Report Joined Var(ix87_precision_string)
+Set 80387 floating-point precision (-mpc32, -mpc64, -mpc80)
+
+mpreferred-stack-boundary=
+Target RejectNegative Joined Var(ix86_preferred_stack_boundary_string)
+Attempt to keep stack aligned to this power of 2
+
+mincoming-stack-boundary=
+Target RejectNegative Joined Var(ix86_incoming_stack_boundary_string)
+Assume incoming stack aligned to this power of 2
+
+mpush-args
+Target Report InverseMask(NO_PUSH_ARGS, PUSH_ARGS) Save
+Use push instructions to save outgoing arguments
+
+mred-zone
+Target RejectNegative Report InverseMask(NO_RED_ZONE, RED_ZONE) Save
+Use red-zone in the x86-64 code
+
+mregparm=
+Target RejectNegative Joined Var(ix86_regparm_string)
+Number of registers used to pass integer arguments
+
+mrtd
+Target Report Mask(RTD) Save
+Alternate calling convention
+
+msoft-float
+Target InverseMask(80387) Save
+Do not use hardware fp
+
+msseregparm
+Target RejectNegative Mask(SSEREGPARM) Save
+Use SSE register passing conventions for SF and DF mode
+
+mstackrealign
+Target Report Var(ix86_force_align_arg_pointer) Init(-1)
+Realign stack in prologue
+
+mstack-arg-probe
+Target Report Mask(STACK_PROBE) Save
+Enable stack probing
+
+mstringop-strategy=
+Target RejectNegative Joined Var(ix86_stringop_string)
+Chose strategy to generate stringop using
+
+mtls-dialect=
+Target RejectNegative Joined Var(ix86_tls_dialect_string)
+Use given thread-local storage dialect
+
+mtls-direct-seg-refs
+Target Report Mask(TLS_DIRECT_SEG_REFS)
+Use direct references against %gs when accessing tls data
+
+mtune=
+Target RejectNegative Joined Var(ix86_tune_string)
+Schedule code for given CPU
+
+mveclibabi=
+Target RejectNegative Joined Var(ix86_veclibabi_string)
+Vector library ABI to use
+
+mrecip
+Target Report Mask(RECIP) Save
+Generate reciprocals instead of divss and sqrtss.
+
+mcld
+Target Report Mask(CLD) Save
+Generate cld instruction in the function prologue.
+
+mno-fused-madd
+Target RejectNegative Report Mask(NO_FUSED_MADD) Undocumented Save
+
+mfused-madd
+Target Report InverseMask(NO_FUSED_MADD, FUSED_MADD) Save
+Enable automatic generation of fused floating point multiply-add instructions
+if the ISA supports such instructions. The -mfused-madd option is on by
+default.
+
+;; ISA support
+
+m32
+Target RejectNegative Negative(m64) Report InverseMask(ISA_64BIT) Var(ix86_isa_flags) VarExists Save
+Generate 32bit i386 code
+
+m64
+Target RejectNegative Negative(m32) Report Mask(ISA_64BIT) Var(ix86_isa_flags) VarExists Save
+Generate 64bit x86-64 code
+
+mmmx
+Target Report Mask(ISA_MMX) Var(ix86_isa_flags) VarExists Save
+Support MMX built-in functions
+
+m3dnow
+Target Report Mask(ISA_3DNOW) Var(ix86_isa_flags) VarExists Save
+Support 3DNow! built-in functions
+
+m3dnowa
+Target Undocumented Mask(ISA_3DNOW_A) Var(ix86_isa_flags) VarExists Save
+Support Athlon 3Dnow! built-in functions
+
+msse
+Target Report Mask(ISA_SSE) Var(ix86_isa_flags) VarExists Save
+Support MMX and SSE built-in functions and code generation
+
+msse2
+Target Report Mask(ISA_SSE2) Var(ix86_isa_flags) VarExists Save
+Support MMX, SSE and SSE2 built-in functions and code generation
+
+msse3
+Target Report Mask(ISA_SSE3) Var(ix86_isa_flags) VarExists Save
+Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation
+
+mssse3
+Target Report Mask(ISA_SSSE3) Var(ix86_isa_flags) VarExists Save
+Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation
+
+msse4.1
+Target Report Mask(ISA_SSE4_1) Var(ix86_isa_flags) VarExists Save
+Support MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 built-in functions and code generation
+
+msse4.2
+Target Report Mask(ISA_SSE4_2) Var(ix86_isa_flags) VarExists Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation
+
+msse4
+Target RejectNegative Report Mask(ISA_SSE4_2) MaskExists Var(ix86_isa_flags) VarExists Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation
+
+mno-sse4
+Target RejectNegative Report InverseMask(ISA_SSE4_1) MaskExists Var(ix86_isa_flags) VarExists Save
+Do not support SSE4.1 and SSE4.2 built-in functions and code generation
+
+mavx
+Target Report Mask(ISA_AVX) Var(ix86_isa_flags) VarExists
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2 and AVX built-in functions and code generation
+
+mfma
+Target Report Mask(ISA_FMA) Var(ix86_isa_flags) VarExists
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation
+
+msse4a
+Target Report Mask(ISA_SSE4A) Var(ix86_isa_flags) VarExists Save
+Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation
+
+msse5
+Target Report Mask(ISA_SSE5) Var(ix86_isa_flags) VarExists Save
+Support SSE5 built-in functions and code generation
+
+mabm
+Target Report Mask(ISA_ABM) Var(ix86_isa_flags) VarExists Save
+Support code generation of Advanced Bit Manipulation (ABM) instructions.
+
+mpopcnt
+Target Report Mask(ISA_POPCNT) Var(ix86_isa_flags) VarExists Save
+Support code generation of popcnt instruction.
+
+mcx16
+Target Report Mask(ISA_CX16) Var(ix86_isa_flags) VarExists Save
+Support code generation of cmpxchg16b instruction.
+
+msahf
+Target Report Mask(ISA_SAHF) Var(ix86_isa_flags) VarExists Save
+Support code generation of sahf instruction in 64bit x86-64 code.
+
+maes
+Target Report Mask(ISA_AES) Var(ix86_isa_flags) VarExists Save
+Support AES built-in functions and code generation
+
+mpclmul
+Target Report Mask(ISA_PCLMUL) Var(ix86_isa_flags) VarExists Save
+Support PCLMUL built-in functions and code generation
+
+msse2avx
+Target Report Var(ix86_sse2avx)
+Encode SSE instructions with VEX prefix
diff --git a/gcc-4.4.0/gcc/config/i386/i386elf.h b/gcc-4.4.0/gcc/config/i386/i386elf.h
new file mode 100644
index 000000000..ba53749ac
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/i386elf.h
@@ -0,0 +1,125 @@
+/* Target definitions for GCC for Intel 80386 using ELF
+ Copyright (C) 1988, 1991, 1995, 2000, 2001, 2002, 2007, 2008
+ Free Software Foundation, Inc.
+
+ Derived from sysv4.h written by Ron Guilmette (rfg@netcom.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Use stabs instead of DWARF debug format. */
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+
+#define TARGET_VERSION fprintf (stderr, " (i386 bare ELF target)");
+
+/* The ELF ABI for the i386 says that records and unions are returned
+ in memory. */
+
+#define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \
+ (TYPE_MODE (TYPE) == BLKmode \
+ || (VECTOR_MODE_P (TYPE_MODE (TYPE)) && int_size_in_bytes (TYPE) == 8))
+
+#undef CPP_SPEC
+#define CPP_SPEC ""
+
+#define ENDFILE_SPEC "crtend.o%s"
+
+#define STARTFILE_SPEC "%{!shared: \
+ %{!symbolic: \
+ %{pg:gcrt0.o%s}%{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}\
+ crtbegin.o%s"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+ (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n])
+
+/* The routine used to output sequences of byte values. We use a special
+ version of this for most svr4 targets because doing so makes the
+ generated assembly code more compact (and thus faster to assemble)
+ as well as more readable. Note that if we find subparts of the
+ character sequence which end with NUL (and which are shorter than
+ STRING_LIMIT) we output those using ASM_OUTPUT_LIMITED_STRING. */
+
+#undef ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH) \
+ do \
+ { \
+ const unsigned char *_ascii_bytes = \
+ (const unsigned char *) (STR); \
+ const unsigned char *limit = _ascii_bytes + (LENGTH); \
+ unsigned bytes_in_chunk = 0; \
+ for (; _ascii_bytes < limit; _ascii_bytes++) \
+ { \
+ const unsigned char *p; \
+ if (bytes_in_chunk >= 64) \
+ { \
+ fputc ('\n', (FILE)); \
+ bytes_in_chunk = 0; \
+ } \
+ for (p = _ascii_bytes; p < limit && *p != '\0'; p++) \
+ continue; \
+ if (p < limit && (p - _ascii_bytes) <= (long) STRING_LIMIT) \
+ { \
+ if (bytes_in_chunk > 0) \
+ { \
+ fputc ('\n', (FILE)); \
+ bytes_in_chunk = 0; \
+ } \
+ ASM_OUTPUT_LIMITED_STRING ((FILE), _ascii_bytes); \
+ _ascii_bytes = p; \
+ } \
+ else \
+ { \
+ if (bytes_in_chunk == 0) \
+ fprintf ((FILE), "\t.byte\t"); \
+ else \
+ fputc (',', (FILE)); \
+ fprintf ((FILE), "0x%02x", *_ascii_bytes); \
+ bytes_in_chunk += 5; \
+ } \
+ } \
+ if (bytes_in_chunk > 0) \
+ fprintf ((FILE), "\n"); \
+ } \
+ while (0)
+
+#define LOCAL_LABEL_PREFIX "."
+
+/* Switch into a generic section. */
+#define TARGET_ASM_NAMED_SECTION default_elf_asm_named_section
+
+/* If defined, a C expression whose value is a string containing the
+ assembler operation to identify the following data as
+ uninitialized global data. If not defined, and neither
+ `ASM_OUTPUT_BSS' nor `ASM_OUTPUT_ALIGNED_BSS' are defined,
+ uninitialized global data will be output in the data section if
+ `-fno-common' is passed, otherwise `ASM_OUTPUT_COMMON' will be
+ used. */
+#undef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+/* Like `ASM_OUTPUT_BSS' except takes the required alignment as a
+ separate, explicit argument. If you define this macro, it is used
+ in place of `ASM_OUTPUT_BSS', and gives you more flexibility in
+ handling the required alignment of the variable. The alignment is
+ specified as the number of bits.
+
+ Try to use function `asm_output_aligned_bss' defined in file
+ `varasm.c' when defining this macro. */
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+ asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
diff --git a/gcc-4.4.0/gcc/config/i386/immintrin.h b/gcc-4.4.0/gcc/config/i386/immintrin.h
new file mode 100644
index 000000000..7a2b9b9c6
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/immintrin.h
@@ -0,0 +1,59 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#define _IMMINTRIN_H_INCLUDED
+
+#ifdef __MMX__
+#include <mmintrin.h>
+#endif
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#ifdef __SSE3__
+#include <pmmintrin.h>
+#endif
+
+#ifdef __SSSE3__
+#include <tmmintrin.h>
+#endif
+
+#if defined (__SSE4_2__) || defined (__SSE4_1__)
+#include <smmintrin.h>
+#endif
+
+#if defined (__AES__) || defined (__PCLMUL__)
+#include <wmmintrin.h>
+#endif
+
+#ifdef __AVX__
+#include <avxintrin.h>
+#endif
+
+#endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/k6.md b/gcc-4.4.0/gcc/config/i386/k6.md
new file mode 100644
index 000000000..030bc26a6
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/k6.md
@@ -0,0 +1,267 @@
+;; AMD K6/K6-2 Scheduling
+;; Copyright (C) 2002, 2004, 2007
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; The K6 architecture is quite similar to PPro. Important difference is
+;; that there are only two decoders and they seems to be much slower than
+;; any of the execution units. So we have to pay much more attention to
+;; proper scheduling for the decoders.
+;; FIXME: We don't do that right now. A good start would be to sort the
+;; instructions based on length.
+;;
+;; This description is based on data from the following documents:
+;;
+;; "AMD-K6 Processor Data Sheet (Preliminary information)"
+;; Advanced Micro Devices, Inc., 1998.
+;;
+;; "AMD-K6 Processor Code Optimization Application Note"
+;; Advanced Micro Devices, Inc., 2000.
+;;
+;; CPU execution units of the K6:
+;;
+;; store describes the Store unit. This unit is not modelled
+;; completely and it is only used to model lea operation.
+;; Otherwise it lies outside of any critical path.
+;; load describes the Load unit
+;; alux describes the Integer X unit
+;; mm describes the Multimedia unit, which shares a pipe
+;; with the Integer X unit. This unit is used for MMX,
+;; which is not implemented for K6.
+;; aluy describes the Integer Y unit
+;; fpu describes the FPU unit
+;; branch describes the Branch unit
+;;
+;; The fp unit is not pipelined, and it can only do one operation per two
+;; cycles, including fxcg.
+;;
+;; Generally this is a very poor description, but at least no worse than
+;; the old description, and a lot easier to extend to something more
+;; reasonable if anyone still cares enough about this architecture in 2004.
+;;
+;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
+
+(define_automaton "k6_decoder,k6_load_unit,k6_store_unit,k6_integer_units,k6_fpu_unit,k6_branch_unit")
+
+;; The K6 instruction decoding begins before the on-chip instruction cache is
+;; filled. Depending on the length of the instruction, two simple instructions
+;; can be decoded in two parallel short decoders, or one complex instruction can
+;; be decoded in either the long or the vector decoder. For all practical
+;; purposes, the long and vector decoder can be modelled as one decoder.
+(define_cpu_unit "k6_decode_short0" "k6_decoder")
+(define_cpu_unit "k6_decode_short1" "k6_decoder")
+(define_cpu_unit "k6_decode_long" "k6_decoder")
+(exclusion_set "k6_decode_long" "k6_decode_short0,k6_decode_short1")
+(define_reservation "k6_decode_short" "k6_decode_short0|k6_decode_short1")
+(define_reservation "k6_decode_vector" "k6_decode_long")
+
+(define_cpu_unit "k6_store" "k6_store_unit")
+(define_cpu_unit "k6_load" "k6_load_unit")
+(define_cpu_unit "k6_alux,k6_aluy" "k6_integer_units")
+(define_cpu_unit "k6_fpu" "k6_fpu_unit")
+(define_cpu_unit "k6_branch" "k6_branch_unit")
+
+;; Shift instructions and certain arithmetic are issued only on Integer X.
+(define_insn_reservation "k6_alux_only" 1
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
+ (eq_attr "memory" "none")))
+ "k6_decode_short,k6_alux")
+
+(define_insn_reservation "k6_alux_only_load" 3
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
+ (eq_attr "memory" "load")))
+ "k6_decode_short,k6_load,k6_alux")
+
+(define_insn_reservation "k6_alux_only_store" 3
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
+ (eq_attr "memory" "store,both,unknown")))
+ "k6_decode_long,k6_load,k6_alux,k6_store")
+
+;; Integer divide and multiply can only be issued on Integer X, too.
+(define_insn_reservation "k6_alu_imul" 2
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "imul"))
+ "k6_decode_vector,k6_alux*3")
+
+(define_insn_reservation "k6_alu_imul_load" 4
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "load")))
+ "k6_decode_vector,k6_load,k6_alux*3")
+
+(define_insn_reservation "k6_alu_imul_store" 4
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imul")
+ (eq_attr "memory" "store,both,unknown")))
+ "k6_decode_vector,k6_load,k6_alux*3,k6_store")
+
+;; ??? Guessed latencies based on the old pipeline description.
+(define_insn_reservation "k6_alu_idiv" 17
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "memory" "none")))
+ "k6_decode_vector,k6_alux*17")
+
+(define_insn_reservation "k6_alu_idiv_mem" 19
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "idiv")
+ (eq_attr "memory" "!none")))
+ "k6_decode_vector,k6_load,k6_alux*17")
+
+;; Basic word and doubleword ALU ops can be issued on both Integer units.
+(define_insn_reservation "k6_alu" 1
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
+ (eq_attr "memory" "none")))
+ "k6_decode_short,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_alu_load" 3
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
+ (eq_attr "memory" "load")))
+ "k6_decode_short,k6_load,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_alu_store" 3
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec,setcc")
+ (eq_attr "memory" "store,both,unknown")))
+ "k6_decode_long,k6_load,k6_alux|k6_aluy,k6_store")
+
+;; A "load immediate" operation does not require execution at all,
+;; it is available immediately after decoding. Special-case this.
+(define_insn_reservation "k6_alu_imov" 1
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imov")
+ (and (eq_attr "memory" "none")
+ (match_operand 1 "nonimmediate_operand"))))
+ "k6_decode_short,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_alu_imov_imm" 0
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imov")
+ (and (eq_attr "memory" "none")
+ (match_operand 1 "immediate_operand"))))
+ "k6_decode_short")
+
+(define_insn_reservation "k6_alu_imov_load" 2
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "load")))
+ "k6_decode_short,k6_load")
+
+(define_insn_reservation "k6_alu_imov_store" 1
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "store")))
+ "k6_decode_short,k6_store")
+
+(define_insn_reservation "k6_alu_imov_both" 2
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "both,unknown")))
+ "k6_decode_long,k6_load,k6_alux|k6_aluy")
+
+;; The branch unit.
+(define_insn_reservation "k6_branch_call" 1
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "call,callv"))
+ "k6_decode_vector,k6_branch")
+
+(define_insn_reservation "k6_branch_branch" 1
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "ibr"))
+ "k6_decode_short,k6_branch")
+
+;; The load and units have two pipeline stages. The load latency is
+;; two cycles.
+(define_insn_reservation "k6_load_pop" 3
+ (and (eq_attr "cpu" "k6")
+ (ior (eq_attr "type" "pop")
+ (eq_attr "memory" "load,both")))
+ "k6_decode_short,k6_load")
+
+(define_insn_reservation "k6_load_leave" 5
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "leave"))
+ "k6_decode_long,k6_load,(k6_alux|k6_aluy)*2")
+
+;; ??? From the old pipeline description. Egad!
+;; ??? Apparently we take care of this reservation in adjust_cost.
+(define_insn_reservation "k6_load_str" 10
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "str")
+ (eq_attr "memory" "load,both")))
+ "k6_decode_vector,k6_load*10")
+
+;; The store unit handles lea and push. It is otherwise unmodelled.
+(define_insn_reservation "k6_store_lea" 2
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "lea"))
+ "k6_decode_short,k6_store,k6_alux|k6_aluy")
+
+(define_insn_reservation "k6_store_push" 2
+ (and (eq_attr "cpu" "k6")
+ (ior (eq_attr "type" "push")
+ (eq_attr "memory" "store,both")))
+ "k6_decode_short,k6_store")
+
+(define_insn_reservation "k6_store_str" 10
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "str"))
+ "k6_store*10")
+
+;; Most FPU instructions have latency 2 and throughput 2.
+(define_insn_reservation "k6_fpu" 2
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "fop,fmov,fcmp,fistp")
+ (eq_attr "memory" "none")))
+ "k6_decode_vector,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_load" 6
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "fop,fmov,fcmp,fistp")
+ (eq_attr "memory" "load,both")))
+ "k6_decode_short,k6_load,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_store" 6
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "fop,fmov,fcmp,fistp")
+ (eq_attr "memory" "store")))
+ "k6_decode_short,k6_store,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_fmul" 2
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "fmul")
+ (eq_attr "memory" "none")))
+ "k6_decode_short,k6_fpu*2")
+
+(define_insn_reservation "k6_fpu_fmul_load" 2
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "fmul")
+ (eq_attr "memory" "load,both")))
+ "k6_decode_short,k6_load,k6_fpu*2")
+
+;; ??? Guessed latencies from the old pipeline description.
+(define_insn_reservation "k6_fpu_expensive" 56
+ (and (eq_attr "cpu" "k6")
+ (eq_attr "type" "fdiv,fpspc"))
+ "k6_decode_short,k6_fpu*56")
+
diff --git a/gcc-4.4.0/gcc/config/i386/kfreebsd-gnu.h b/gcc-4.4.0/gcc/config/i386/kfreebsd-gnu.h
new file mode 100644
index 000000000..b5fb2ba29
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/kfreebsd-gnu.h
@@ -0,0 +1,25 @@
+/* Definitions for Intel 386 running kFreeBSD-based GNU systems with ELF format
+ Copyright (C) 2004, 2007
+ Free Software Foundation, Inc.
+ Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#undef LINK_EMULATION
+#define LINK_EMULATION "elf_i386_fbsd"
+#undef REG_NAME
+#define REG_NAME(reg) sc_ ## reg
diff --git a/gcc-4.4.0/gcc/config/i386/knetbsd-gnu.h b/gcc-4.4.0/gcc/config/i386/knetbsd-gnu.h
new file mode 100644
index 000000000..54f5a6920
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/knetbsd-gnu.h
@@ -0,0 +1,23 @@
+/* Definitions for Intel 386 running kNetBSD-based GNU systems with ELF format
+ Copyright (C) 2004, 2007
+ Free Software Foundation, Inc.
+ Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#undef REG_NAME
+#define REG_NAME(reg) sc_ ## reg
diff --git a/gcc-4.4.0/gcc/config/i386/kopensolaris-gnu.h b/gcc-4.4.0/gcc/config/i386/kopensolaris-gnu.h
new file mode 100644
index 000000000..3e315b83f
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/kopensolaris-gnu.h
@@ -0,0 +1,22 @@
+/* Definitions for Intel 386 running kOpenSolaris-based GNU systems with ELF format
+ Copyright (C) 2009
+ Free Software Foundation, Inc.
+ Contributed by Robert Millan.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#undef MD_UNWIND_SUPPORT
diff --git a/gcc-4.4.0/gcc/config/i386/libgcc-glibc.ver b/gcc-4.4.0/gcc/config/i386/libgcc-glibc.ver
new file mode 100644
index 000000000..be791745b
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/libgcc-glibc.ver
@@ -0,0 +1,165 @@
+# In order to work around the very problems that force us to now generally
+# create a libgcc.so, glibc reexported a number of routines from libgcc.a.
+# By now choosing the same version tags for these specific routines, we
+# maintain enough binary compatibility to allow future versions of glibc
+# to defer implementation of these routines to libgcc.so via DT_AUXILIARY.
+
+%ifndef __x86_64__
+%exclude {
+ __divdi3
+ __moddi3
+ __udivdi3
+ __umoddi3
+ __register_frame
+ __register_frame_table
+ __deregister_frame
+ __register_frame_info
+ __deregister_frame_info
+ __frame_state_for
+ __register_frame_info_table
+}
+
+%inherit GCC_3.0 GLIBC_2.0
+GLIBC_2.0 {
+ # Sampling of DImode arithmetic used by (at least) i386 and m68k.
+ __divdi3
+ __moddi3
+ __udivdi3
+ __umoddi3
+
+ # Exception handling support functions used by most everyone.
+ __register_frame
+ __register_frame_table
+ __deregister_frame
+ __register_frame_info
+ __deregister_frame_info
+ __frame_state_for
+ __register_frame_info_table
+}
+%endif
+
+# 128 bit long double support was introduced with GCC 4.3.0 to 64bit
+# and with GCC 4.4.0 to 32bit. These lines make the symbols to get
+# a @@GCC_4.3.0 or @@GCC_4.4.0 attached.
+
+%exclude {
+ __addtf3
+ __divtc3
+ __divtf3
+ __eqtf2
+ __extenddftf2
+ __extendsftf2
+ __extendxftf2
+ __fixtfdi
+ __fixtfsi
+ __fixtfti
+ __fixunstfdi
+ __fixunstfsi
+ __fixunstfti
+ __floatditf
+ __floatsitf
+ __floattitf
+ __floatunditf
+ __floatunsitf
+ __floatuntitf
+ __getf2
+ __gttf2
+ __letf2
+ __lttf2
+ __multc3
+ __multf3
+ __negtf2
+ __netf2
+ __powitf2
+ __subtf3
+ __trunctfdf2
+ __trunctfsf2
+ __trunctfxf2
+ __unordtf2
+}
+
+%ifdef __x86_64__
+# Those symbols had improper versions when they were added to gcc 4.3.0.
+# We corrected the default version to GCC_4.3.0. But we keep the old
+# version for backward binary compatibility.
+GCC_3.0 {
+ __gttf2
+ __lttf2
+ __netf2
+}
+
+GCC_4.0.0 {
+ __divtc3
+ __multc3
+ __powitf2
+}
+
+GCC_4.3.0 {
+ __addtf3
+ __divtc3
+ __divtf3
+ __eqtf2
+ __extenddftf2
+ __extendsftf2
+ __extendxftf2
+ __fixtfdi
+ __fixtfsi
+ __fixtfti
+ __fixunstfdi
+ __fixunstfsi
+ __fixunstfti
+ __floatditf
+ __floatsitf
+ __floattitf
+ __floatunditf
+ __floatunsitf
+ __floatuntitf
+ __getf2
+ __gttf2
+ __letf2
+ __lttf2
+ __multc3
+ __multf3
+ __negtf2
+ __netf2
+ __powitf2
+ __subtf3
+ __trunctfdf2
+ __trunctfsf2
+ __trunctfxf2
+ __unordtf2
+}
+%else
+GCC_4.4.0 {
+ __addtf3
+ __copysigntf3
+ __divtc3
+ __divtf3
+ __eqtf2
+ __extenddftf2
+ __extendsftf2
+ __fabstf2
+ __fixtfdi
+ __fixtfsi
+ __fixunstfdi
+ __fixunstfsi
+ __floatditf
+ __floatsitf
+ __floatunditf
+ __floatunsitf
+ __getf2
+ __gttf2
+ __letf2
+ __lttf2
+ __multc3
+ __multf3
+ __negtf2
+ __netf2
+ __powitf2
+ __subtf3
+ __trunctfdf2
+ __trunctfsf2
+ __trunctfxf2
+ __unordtf2
+}
+%endif
diff --git a/gcc-4.4.0/gcc/config/i386/linux-unwind.h b/gcc-4.4.0/gcc/config/i386/linux-unwind.h
new file mode 100644
index 000000000..89f238a6e
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/linux-unwind.h
@@ -0,0 +1,177 @@
+/* DWARF2 EH unwinding support for AMD x86-64 and x86.
+ Copyright (C) 2004, 2005, 2006, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* Do code reading to identify a signal frame, and set the frame
+ state data appropriately. See unwind-dw2.c for the structs.
+ Don't use this at all if inhibit_libc is used. */
+
+#ifndef inhibit_libc
+
+#ifdef __x86_64__
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+#define MD_FALLBACK_FRAME_STATE_FOR x86_64_fallback_frame_state
+
+static _Unwind_Reason_Code
+x86_64_fallback_frame_state (struct _Unwind_Context *context,
+ _Unwind_FrameState *fs)
+{
+ unsigned char *pc = context->ra;
+ struct sigcontext *sc;
+ long new_cfa;
+
+ /* movq __NR_rt_sigreturn, %rax ; syscall */
+ if (*(unsigned char *)(pc+0) == 0x48
+ && *(unsigned long *)(pc+1) == 0x050f0000000fc0c7)
+ {
+ struct ucontext *uc_ = context->cfa;
+ /* The void * cast is necessary to avoid an aliasing warning.
+ The aliasing warning is correct, but should not be a problem
+ because it does not alias anything. */
+ sc = (struct sigcontext *) (void *) &uc_->uc_mcontext;
+ }
+ else
+ return _URC_END_OF_STACK;
+
+ new_cfa = sc->rsp;
+ fs->regs.cfa_how = CFA_REG_OFFSET;
+ /* Register 7 is rsp */
+ fs->regs.cfa_reg = 7;
+ fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+ /* The SVR4 register numbering macros aren't usable in libgcc. */
+ fs->regs.reg[0].how = REG_SAVED_OFFSET;
+ fs->regs.reg[0].loc.offset = (long)&sc->rax - new_cfa;
+ fs->regs.reg[1].how = REG_SAVED_OFFSET;
+ fs->regs.reg[1].loc.offset = (long)&sc->rdx - new_cfa;
+ fs->regs.reg[2].how = REG_SAVED_OFFSET;
+ fs->regs.reg[2].loc.offset = (long)&sc->rcx - new_cfa;
+ fs->regs.reg[3].how = REG_SAVED_OFFSET;
+ fs->regs.reg[3].loc.offset = (long)&sc->rbx - new_cfa;
+ fs->regs.reg[4].how = REG_SAVED_OFFSET;
+ fs->regs.reg[4].loc.offset = (long)&sc->rsi - new_cfa;
+ fs->regs.reg[5].how = REG_SAVED_OFFSET;
+ fs->regs.reg[5].loc.offset = (long)&sc->rdi - new_cfa;
+ fs->regs.reg[6].how = REG_SAVED_OFFSET;
+ fs->regs.reg[6].loc.offset = (long)&sc->rbp - new_cfa;
+ fs->regs.reg[8].how = REG_SAVED_OFFSET;
+ fs->regs.reg[8].loc.offset = (long)&sc->r8 - new_cfa;
+ fs->regs.reg[9].how = REG_SAVED_OFFSET;
+ fs->regs.reg[9].loc.offset = (long)&sc->r9 - new_cfa;
+ fs->regs.reg[10].how = REG_SAVED_OFFSET;
+ fs->regs.reg[10].loc.offset = (long)&sc->r10 - new_cfa;
+ fs->regs.reg[11].how = REG_SAVED_OFFSET;
+ fs->regs.reg[11].loc.offset = (long)&sc->r11 - new_cfa;
+ fs->regs.reg[12].how = REG_SAVED_OFFSET;
+ fs->regs.reg[12].loc.offset = (long)&sc->r12 - new_cfa;
+ fs->regs.reg[13].how = REG_SAVED_OFFSET;
+ fs->regs.reg[13].loc.offset = (long)&sc->r13 - new_cfa;
+ fs->regs.reg[14].how = REG_SAVED_OFFSET;
+ fs->regs.reg[14].loc.offset = (long)&sc->r14 - new_cfa;
+ fs->regs.reg[15].how = REG_SAVED_OFFSET;
+ fs->regs.reg[15].loc.offset = (long)&sc->r15 - new_cfa;
+ fs->regs.reg[16].how = REG_SAVED_OFFSET;
+ fs->regs.reg[16].loc.offset = (long)&sc->rip - new_cfa;
+ fs->retaddr_column = 16;
+ fs->signal_frame = 1;
+ return _URC_NO_REASON;
+}
+
+#else /* ifdef __x86_64__ */
+
+/* There's no sys/ucontext.h for glibc 2.0, so no
+ signal-turned-exceptions for them. There's also no configure-run for
+ the target, so we can't check on (e.g.) HAVE_SYS_UCONTEXT_H. Using the
+ target libc version macro should be enough. */
+#if !(__GLIBC__ == 2 && __GLIBC_MINOR__ == 0)
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+#define MD_FALLBACK_FRAME_STATE_FOR x86_fallback_frame_state
+
+static _Unwind_Reason_Code
+x86_fallback_frame_state (struct _Unwind_Context *context,
+ _Unwind_FrameState *fs)
+{
+ unsigned char *pc = context->ra;
+ struct sigcontext *sc;
+ long new_cfa;
+
+ /* popl %eax ; movl $__NR_sigreturn,%eax ; int $0x80 */
+ if (*(unsigned short *)(pc+0) == 0xb858
+ && *(unsigned int *)(pc+2) == 119
+ && *(unsigned short *)(pc+6) == 0x80cd)
+ sc = context->cfa + 4;
+ /* movl $__NR_rt_sigreturn,%eax ; int $0x80 */
+ else if (*(unsigned char *)(pc+0) == 0xb8
+ && *(unsigned int *)(pc+1) == 173
+ && *(unsigned short *)(pc+5) == 0x80cd)
+ {
+ struct rt_sigframe {
+ int sig;
+ struct siginfo *pinfo;
+ void *puc;
+ struct siginfo info;
+ struct ucontext uc;
+ } *rt_ = context->cfa;
+ /* The void * cast is necessary to avoid an aliasing warning.
+ The aliasing warning is correct, but should not be a problem
+ because it does not alias anything. */
+ sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext;
+ }
+ else
+ return _URC_END_OF_STACK;
+
+ new_cfa = sc->REG_NAME(esp);
+ fs->regs.cfa_how = CFA_REG_OFFSET;
+ fs->regs.cfa_reg = 4;
+ fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+
+ /* The SVR4 register numbering macros aren't usable in libgcc. */
+ fs->regs.reg[0].how = REG_SAVED_OFFSET;
+ fs->regs.reg[0].loc.offset = (long)&sc->REG_NAME(eax) - new_cfa;
+ fs->regs.reg[3].how = REG_SAVED_OFFSET;
+ fs->regs.reg[3].loc.offset = (long)&sc->REG_NAME(ebx) - new_cfa;
+ fs->regs.reg[1].how = REG_SAVED_OFFSET;
+ fs->regs.reg[1].loc.offset = (long)&sc->REG_NAME(ecx) - new_cfa;
+ fs->regs.reg[2].how = REG_SAVED_OFFSET;
+ fs->regs.reg[2].loc.offset = (long)&sc->REG_NAME(edx) - new_cfa;
+ fs->regs.reg[6].how = REG_SAVED_OFFSET;
+ fs->regs.reg[6].loc.offset = (long)&sc->REG_NAME(esi) - new_cfa;
+ fs->regs.reg[7].how = REG_SAVED_OFFSET;
+ fs->regs.reg[7].loc.offset = (long)&sc->REG_NAME(edi) - new_cfa;
+ fs->regs.reg[5].how = REG_SAVED_OFFSET;
+ fs->regs.reg[5].loc.offset = (long)&sc->REG_NAME(ebp) - new_cfa;
+ fs->regs.reg[8].how = REG_SAVED_OFFSET;
+ fs->regs.reg[8].loc.offset = (long)&sc->REG_NAME(eip) - new_cfa;
+ fs->retaddr_column = 8;
+ fs->signal_frame = 1;
+ return _URC_NO_REASON;
+}
+#endif /* not glibc 2.0 */
+#endif /* ifdef __x86_64__ */
+#endif /* ifdef inhibit_libc */
diff --git a/gcc-4.4.0/gcc/config/i386/linux.h b/gcc-4.4.0/gcc/config/i386/linux.h
new file mode 100644
index 000000000..9809c5988
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/linux.h
@@ -0,0 +1,217 @@
+/* Definitions for Intel 386 running Linux-based GNU systems with ELF format.
+ Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2001, 2002, 2004, 2005,
+ 2006, 2007, 2008 Free Software Foundation, Inc.
+ Contributed by Eric Youngdale.
+ Modified for stabs-in-ELF by H.J. Lu.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Output at beginning of assembler file. */
+/* The .file command should always begin the output. */
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#define TARGET_VERSION fprintf (stderr, " (i386 Linux/ELF)");
+
+/* The svr4 ABI for the i386 says that records and unions are returned
+ in memory. */
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* We arrange for the whole %gs segment to map the tls area. */
+#undef TARGET_TLS_DIRECT_SEG_REFS_DEFAULT
+#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT MASK_TLS_DIRECT_SEG_REFS
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+ (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n])
+
+/* Output assembler code to FILE to call the profiler.
+ To the best of my knowledge, no Linux libc has required the label
+ argument to mcount. */
+
+#define NO_PROFILE_COUNTERS 1
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME "mcount"
+
+/* The GLIBC version of mcount for the x86 assumes that there is a
+ frame, so we cannot allow profiling without a frame pointer. */
+
+#undef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED crtl->profile
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ LINUX_TARGET_OS_CPP_BUILTINS(); \
+ } \
+ while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu) %{profile:-p}"
+
+/* Provide a LINK_SPEC appropriate for Linux. Here we provide support
+ for the special GCC options -static and -shared, which allow us to
+ link things in one of these three modes by applying the appropriate
+ combinations of options at link-time. We like to support here for
+ as many of the other GNU linker options as possible. But I don't
+ have the time to search for those flags. I am sure how to add
+ support for -soname shared_object_name. H.J.
+
+ I took out %{v:%{!V:-V}}. It is too much :-(. They can use
+ -Wl,-V.
+
+ When the -shared link option is used a final link is not being
+ done. */
+
+/* If ELF is the default format, we should not use /lib/elf. */
+
+/* These macros may be overridden in k*bsd-gnu.h and i386/k*bsd-gnu.h. */
+#define LINK_EMULATION "elf_i386"
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define GLIBC_DYNAMIC_LINKER RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+ "%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Yd,*} %{Wa,*:%*} \
+ %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}}"
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ { "link_emulation", LINK_EMULATION },\
+ { "dynamic_linker", LINUX_DYNAMIC_LINKER }
+
+#undef LINK_SPEC
+#define LINK_SPEC "-m %(link_emulation) %{shared:-shared} \
+ %{!shared: \
+ %{!ibcs: \
+ %{!static: \
+ %{rdynamic:-export-dynamic} \
+ %{!dynamic-linker:-dynamic-linker %(dynamic_linker)}} \
+ %{static:-static}}}"
+
+/* Similar to standard Linux, but adding -ffast-math support. */
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+ %{mpc32:crtprec32.o%s} \
+ %{mpc64:crtprec64.o%s} \
+ %{mpc80:crtprec80.o%s} \
+ %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+/* A C statement (sans semicolon) to output to the stdio stream
+ FILE the assembler definition of uninitialized global DECL named
+ NAME whose size is SIZE bytes and alignment is ALIGN bytes.
+ Try to use asm_output_aligned_bss to implement this macro. */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+ asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* A C statement to output to the stdio stream FILE an assembler
+ command to advance the location counter to a multiple of 1<<LOG
+ bytes if it is within MAX_SKIP bytes.
+
+ This is used to align code labels according to Intel recommendations. */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
+ do { \
+ if ((LOG) != 0) { \
+ if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+ else { \
+ fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
+ /* Make sure that we have at least 8 byte alignment if > 8 byte \
+ alignment is preferred. */ \
+ if ((LOG) > 3 && (1 << (LOG)) > ((MAX_SKIP) + 1)) \
+ fprintf ((FILE), "\t.p2align 3\n"); \
+ } \
+ } \
+ } while (0)
+#endif
+
+/* Handle special EH pointer encodings. Absolute, pc-relative, and
+ indirect are handled automatically. */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+ do { \
+ if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_datarel) \
+ { \
+ fputs (ASM_LONG, FILE); \
+ assemble_name (FILE, XSTR (ADDR, 0)); \
+ fputs (((ENCODING) & DW_EH_PE_indirect ? "@GOT" : "@GOTOFF"), FILE); \
+ goto DONE; \
+ } \
+ } while (0)
+
+/* Used by crtstuff.c to initialize the base of data-relative relocations.
+ These are GOT relative on x86, so return the pic register. */
+#ifdef __PIC__
+#define CRT_GET_RFIB_DATA(BASE) \
+ { \
+ register void *ebx_ __asm__("ebx"); \
+ BASE = ebx_; \
+ }
+#else
+#define CRT_GET_RFIB_DATA(BASE) \
+ __asm__ ("call\t.LPR%=\n" \
+ ".LPR%=:\n\t" \
+ "pop{l}\t%0\n\t" \
+ /* Due to a GAS bug, this cannot use EAX. That encodes \
+ smaller than the traditional EBX, which results in the \
+ offset being off by one. */ \
+ "add{l}\t{$_GLOBAL_OFFSET_TABLE_+[.-.LPR%=],%0" \
+ "|%0,_GLOBAL_OFFSET_TABLE_+(.-.LPR%=)}" \
+ : "=d"(BASE))
+#endif
+
+/* Put all *tf routines in libgcc. */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+#undef NEED_INDICATE_EXEC_STACK
+#define NEED_INDICATE_EXEC_STACK 1
+
+#define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h"
+
+/* This macro may be overridden in i386/k*bsd-gnu.h. */
+#define REG_NAME(reg) reg
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* i386 glibc provides __stack_chk_guard in %gs:0x14. */
+#define TARGET_THREAD_SSP_OFFSET 0x14
+#endif
diff --git a/gcc-4.4.0/gcc/config/i386/linux64.h b/gcc-4.4.0/gcc/config/i386/linux64.h
new file mode 100644
index 000000000..379e2c7d8
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/linux64.h
@@ -0,0 +1,123 @@
+/* Definitions for AMD x86-64 running Linux-based GNU systems with ELF format.
+ Copyright (C) 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009
+ Free Software Foundation, Inc.
+ Contributed by Jan Hubicka <jh@suse.cz>, based on linux.h.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#if TARGET_64BIT_DEFAULT
+#define TARGET_VERSION fprintf (stderr, " (x86-64 Linux/ELF)");
+#else
+#define TARGET_VERSION fprintf (stderr, " (i386 Linux/ELF)");
+#endif
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ LINUX_TARGET_OS_CPP_BUILTINS(); \
+ } \
+ while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+/* The svr4 ABI for the i386 says that records and unions are returned
+ in memory. In the 64bit compilation we will turn this flag off in
+ override_options, as we never do pcc_struct_return scheme on this target. */
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* We arrange for the whole %fs segment to map the tls area. */
+#undef TARGET_TLS_DIRECT_SEG_REFS_DEFAULT
+#define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT MASK_TLS_DIRECT_SEG_REFS
+
+/* Provide a LINK_SPEC. Here we provide support for the special GCC
+ options -static and -shared, which allow us to link things in one
+ of these three modes by applying the appropriate combinations of
+ options at link-time.
+
+ When the -shared link option is used a final link is not being
+ done. */
+
+#ifndef RUNTIME_ROOT_PREFIX
+#define RUNTIME_ROOT_PREFIX ""
+#endif
+#define GLIBC_DYNAMIC_LINKER32 RUNTIME_ROOT_PREFIX "/lib/ld-linux.so.2"
+#define GLIBC_DYNAMIC_LINKER64 RUNTIME_ROOT_PREFIX "/lib64/ld-linux-x86-64.so.2"
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Yd,*} \
+ %{Wa,*:%*} %{m32:--32} %{m64:--64} \
+ %{!mno-sse2avx:%{mavx:-msse2avx}} %{msse2avx:%{!mavx:-msse2avx}}"
+
+#if TARGET_64BIT_DEFAULT
+#define SPEC_32 "m32"
+#define SPEC_64 "!m32"
+#else
+#define SPEC_32 "!m64"
+#define SPEC_64 "m64"
+#endif
+
+#undef LINK_SPEC
+#define LINK_SPEC "%{" SPEC_64 ":-m elf_x86_64} %{" SPEC_32 ":-m elf_i386} \
+ %{shared:-shared} \
+ %{!shared: \
+ %{!static: \
+ %{rdynamic:-export-dynamic} \
+ %{" SPEC_32 ":%{!dynamic-linker:-dynamic-linker " LINUX_DYNAMIC_LINKER32 "}} \
+ %{" SPEC_64 ":%{!dynamic-linker:-dynamic-linker " LINUX_DYNAMIC_LINKER64 "}}} \
+ %{static:-static}}"
+
+/* Similar to standard Linux, but adding -ffast-math support. */
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+ %{mpc32:crtprec32.o%s} \
+ %{mpc64:crtprec64.o%s} \
+ %{mpc80:crtprec80.o%s} \
+ %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+#if TARGET_64BIT_DEFAULT
+#define MULTILIB_DEFAULTS { "m64" }
+#else
+#define MULTILIB_DEFAULTS { "m32" }
+#endif
+
+/* Put all *tf routines in libgcc. */
+#undef LIBGCC2_HAS_TF_MODE
+#define LIBGCC2_HAS_TF_MODE 1
+#define LIBGCC2_TF_CEXT q
+#define TF_SIZE 113
+
+#undef NEED_INDICATE_EXEC_STACK
+#define NEED_INDICATE_EXEC_STACK 1
+
+#define MD_UNWIND_SUPPORT "config/i386/linux-unwind.h"
+
+/* This macro may be overridden in i386/k*bsd-gnu.h. */
+#define REG_NAME(reg) reg
+
+#ifdef TARGET_LIBC_PROVIDES_SSP
+/* i386 glibc provides __stack_chk_guard in %gs:0x14,
+ x86_64 glibc provides it in %fs:0x28. */
+#define TARGET_THREAD_SSP_OFFSET (TARGET_64BIT ? 0x28 : 0x14)
+#endif
diff --git a/gcc-4.4.0/gcc/config/i386/lynx.h b/gcc-4.4.0/gcc/config/i386/lynx.h
new file mode 100644
index 000000000..cd8386231
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/lynx.h
@@ -0,0 +1,90 @@
+/* Definitions for LynxOS on i386.
+ Copyright (C) 1993, 1995, 1996, 2002, 2004, 2005, 2007
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define TARGET_VERSION fputs (" (i386/LynxOS)", stderr);
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_define ("__LITTLE_ENDIAN__"); \
+ builtin_define ("__x86__"); \
+ } \
+ while (0)
+
+/* The svr4 ABI for the i386 says that records and unions are returned
+ in memory. */
+
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* BSS_SECTION_ASM_OP gets defined i386/unix.h. */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+ asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* LynxOS's GDB counts the floating point registers from 16. */
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+ (TARGET_64BIT ? dbx64_register_map[n] \
+ : (n) == 0 ? 0 \
+ : (n) == 1 ? 2 \
+ : (n) == 2 ? 1 \
+ : (n) == 3 ? 3 \
+ : (n) == 4 ? 6 \
+ : (n) == 5 ? 7 \
+ : (n) == 6 ? 5 \
+ : (n) == 7 ? 4 \
+ : ((n) >= FIRST_STACK_REG && (n) <= LAST_STACK_REG) ? (n) + 8 \
+ : (-1))
+
+/* A C statement to output to the stdio stream FILE an assembler
+ command to advance the location counter to a multiple of 1<<LOG
+ bytes if it is within MAX_SKIP bytes.
+
+ This is used to align code labels according to Intel recommendations. */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
+ do { \
+ if ((LOG) != 0) { \
+ if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+ else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
+ } \
+ } while (0)
+#endif
+
+/* Undefine SUBTARGET_EXTRA_SPECS it is empty anyway. We define it in
+ config/lynx.h. */
+
+#undef SUBTARGET_EXTRA_SPECS
+
+/* Undefine the definition from att.h to enable our default. */
+
+#undef ASM_OUTPUT_ALIGN
+
+/* Undefine the definition from elfos.h to enable our default. */
+
+#undef PREFERRED_DEBUGGING_TYPE
+
+/* The file i386.c defines TARGET_HAVE_TLS unconditionally if
+ HAVE_AS_TLS is defined. HAVE_AS_TLS is defined as gas support for
+ TLS is detected by configure. We undefine it here. */
+
+#undef HAVE_AS_TLS
diff --git a/gcc-4.4.0/gcc/config/i386/mach.h b/gcc-4.4.0/gcc/config/i386/mach.h
new file mode 100644
index 000000000..0aa3846ae
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/mach.h
@@ -0,0 +1,20 @@
+/* Configuration for an i386 running Mach as the target machine. */
+
+#define TARGET_VERSION fprintf (stderr, " (80386, Mach)");
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_define_std ("unix"); \
+ builtin_define_std ("MACH"); \
+ builtin_assert ("system=unix"); \
+ builtin_assert ("system=mach"); \
+ } \
+ while (0)
+
+/* Specify extra dir to search for include files. */
+#define SYSTEM_INCLUDE_DIR "/usr/mach/include"
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+ we want to retain compatibility with older gcc versions. */
+#define DEFAULT_PCC_STRUCT_RETURN 0
diff --git a/gcc-4.4.0/gcc/config/i386/mingw.opt b/gcc-4.4.0/gcc/config/i386/mingw.opt
new file mode 100644
index 000000000..6be904e96
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/mingw.opt
@@ -0,0 +1,23 @@
+; MinGW-specific options.
+
+; Copyright (C) 2008 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3. If not see
+; <http://www.gnu.org/licenses/>.
+
+Wpedantic-ms-format
+C ObjC C++ ObjC++ Var(warn_pedantic_ms_format) Init(1) Warning
+Warn about none ISO msvcrt scanf/printf width extensions
diff --git a/gcc-4.4.0/gcc/config/i386/mingw32.h b/gcc-4.4.0/gcc/config/i386/mingw32.h
new file mode 100644
index 000000000..f3fbe8c58
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/mingw32.h
@@ -0,0 +1,221 @@
+/* Operating system specific defines to be used when targeting GCC for
+ hosting on Windows32, using GNU tools and the Windows32 API Library.
+ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2007, 2008,
+ 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#undef TARGET_VERSION
+#if TARGET_64BIT_DEFAULT
+#define TARGET_VERSION fprintf (stderr,"(x86_64 MinGW");
+#else
+#define TARGET_VERSION fprintf (stderr," (x86 MinGW)");
+#endif
+
+/* See i386/crtdll.h for an alternative definition. _INTEGRAL_MAX_BITS
+ is for compatibility with native compiler. */
+#define EXTRA_OS_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_define ("__MSVCRT__"); \
+ builtin_define ("__MINGW32__"); \
+ builtin_define ("_WIN32"); \
+ builtin_define_std ("WIN32"); \
+ builtin_define_std ("WINNT"); \
+ builtin_define_with_int_value ("_INTEGRAL_MAX_BITS", \
+ TYPE_PRECISION (intmax_type_node));\
+ if (TARGET_64BIT && DEFAULT_ABI == MS_ABI) \
+ { \
+ builtin_define ("__MINGW64__"); \
+ builtin_define_std ("WIN64"); \
+ builtin_define_std ("_WIN64"); \
+ } \
+ } \
+ while (0)
+
+/* Override the standard choice of /usr/include as the default prefix
+ to try when searching for header files. */
+#undef STANDARD_INCLUDE_DIR
+#if TARGET_64BIT_DEFAULT
+#define STANDARD_INCLUDE_DIR "/mingw/include64"
+#else
+#define STANDARD_INCLUDE_DIR "/mingw/include"
+#endif
+#undef STANDARD_INCLUDE_COMPONENT
+#define STANDARD_INCLUDE_COMPONENT "MINGW"
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{mthreads:-D_MT}"
+
+/* For Windows applications, include more libraries, but always include
+ kernel32. */
+#undef LIB_SPEC
+#define LIB_SPEC "%{pg:-lgmon} %{mwindows:-lgdi32 -lcomdlg32} \
+ -luser32 -lkernel32 -ladvapi32 -lshell32"
+
+/* Weak symbols do not get resolved if using a Windows dll import lib.
+ Make the unwind registration references strong undefs. */
+#if DWARF2_UNWIND_INFO
+#define SHARED_LIBGCC_UNDEFS_SPEC \
+ "%{shared-libgcc: -u ___register_frame_info -u ___deregister_frame_info}"
+#else
+#define SHARED_LIBGCC_UNDEFS_SPEC ""
+#endif
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ { "shared_libgcc_undefs", SHARED_LIBGCC_UNDEFS_SPEC }
+
+#define LINK_SPEC "%{mwindows:--subsystem windows} \
+ %{mconsole:--subsystem console} \
+ %{shared: %{mdll: %eshared and mdll are not compatible}} \
+ %{shared: --shared} %{mdll:--dll} \
+ %{static:-Bstatic} %{!static:-Bdynamic} \
+ %{shared|mdll: -e _DllMainCRTStartup@12 --enable-auto-image-base} \
+ %(shared_libgcc_undefs)"
+
+/* Include in the mingw32 libraries with libgcc */
+#ifdef ENABLE_SHARED_LIBGCC
+#define SHARED_LIBGCC_SPEC "%{shared-libgcc:-lgcc_s} %{!shared-libgcc:-lgcc_eh}"
+#else
+#define SHARED_LIBGCC_SPEC /*empty*/
+#endif
+#undef REAL_LIBGCC_SPEC
+#define REAL_LIBGCC_SPEC \
+ "%{mthreads:-lmingwthrd} -lmingw32 \
+ "SHARED_LIBGCC_SPEC" \
+ -lgcc \
+ -lmoldname -lmingwex -lmsvcrt"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "%{shared|mdll:dllcrt2%O%s} \
+ %{!shared:%{!mdll:crt2%O%s}} %{pg:gcrt2%O%s} \
+ crtbegin.o%s"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+ crtend.o%s"
+
+/* Override startfile prefix defaults. */
+#ifndef STANDARD_STARTFILE_PREFIX_1
+#if TARGET_64BIT_DEFAULT
+#define STANDARD_STARTFILE_PREFIX_1 "/mingw/lib64/"
+#else
+#define STANDARD_STARTFILE_PREFIX_1 "/mingw/lib/"
+#endif
+#endif
+#ifndef STANDARD_STARTFILE_PREFIX_2
+#define STANDARD_STARTFILE_PREFIX_2 ""
+#endif
+
+/* Output STRING, a string representing a filename, to FILE.
+ We canonicalize it to be in Unix format (backslashes are replaced
+ forward slashes. */
+#undef OUTPUT_QUOTED_STRING
+#define OUTPUT_QUOTED_STRING(FILE, STRING) \
+do { \
+ char c; \
+ \
+ putc ('\"', asm_file); \
+ \
+ while ((c = *string++) != 0) \
+ { \
+ if (c == '\\') \
+ c = '/'; \
+ \
+ if (ISPRINT (c)) \
+ { \
+ if (c == '\"') \
+ putc ('\\', asm_file); \
+ putc (c, asm_file); \
+ } \
+ else \
+ fprintf (asm_file, "\\%03o", (unsigned char) c); \
+ } \
+ \
+ putc ('\"', asm_file); \
+} while (0)
+
+/* Define as short unsigned for compatibility with MS runtime. */
+#undef WINT_TYPE
+#define WINT_TYPE "short unsigned int"
+
+/* mingw32 uses the -mthreads option to enable thread support. */
+#undef GOMP_SELF_SPECS
+#define GOMP_SELF_SPECS "%{fopenmp: -mthreads}"
+
+/* mingw32 atexit function is safe to use in shared libraries. Use it
+ to register C++ static destructors. */
+#define TARGET_CXX_USE_ATEXIT_FOR_CXA_ATEXIT hook_bool_void_true
+
+/* Contains a pointer to type target_ovr_attr defining the target specific
+ overrides of format attributes. See c-format.h for structure
+ definition. */
+#undef TARGET_OVERRIDES_FORMAT_ATTRIBUTES
+#define TARGET_OVERRIDES_FORMAT_ATTRIBUTES mingw_format_attribute_overrides
+
+/* Specify the count of elements in TARGET_OVERRIDES_ATTRIBUTE. */
+#undef TARGET_OVERRIDES_FORMAT_ATTRIBUTES_COUNT
+#define TARGET_OVERRIDES_FORMAT_ATTRIBUTES_COUNT 3
+
+/* Custom initialization for warning -Wpedantic-ms-format for c-format. */
+#undef TARGET_OVERRIDES_FORMAT_INIT
+#define TARGET_OVERRIDES_FORMAT_INIT msformat_init
+
+/* MS specific format attributes for ms_printf, ms_scanf, ms_strftime. */
+#undef TARGET_FORMAT_TYPES
+#define TARGET_FORMAT_TYPES mingw_format_attributes
+
+#undef TARGET_N_FORMAT_TYPES
+#define TARGET_N_FORMAT_TYPES 3
+
+/* Let defaults.h definition of TARGET_USE_JCR_SECTION apply. */
+#undef TARGET_USE_JCR_SECTION
+
+#undef MINGW_ENABLE_EXECUTE_STACK
+#define MINGW_ENABLE_EXECUTE_STACK \
+extern void __enable_execute_stack (void *); \
+void \
+__enable_execute_stack (void *addr) \
+{ \
+ MEMORY_BASIC_INFORMATION b; \
+ if (!VirtualQuery (addr, &b, sizeof(b))) \
+ abort (); \
+ VirtualProtect (b.BaseAddress, b.RegionSize, PAGE_EXECUTE_READWRITE, \
+ &b.Protect); \
+}
+
+#undef ENABLE_EXECUTE_STACK
+#define ENABLE_EXECUTE_STACK MINGW_ENABLE_EXECUTE_STACK
+
+#ifdef IN_LIBGCC2
+#include <windows.h>
+#endif
+
+#if !TARGET_64BIT
+#define MD_UNWIND_SUPPORT "config/i386/w32-unwind.h"
+#endif
+
+/* This matches SHLIB_SONAME and SHLIB_SOVERSION in t-cygming. */
+/* This matches SHLIB_SONAME and SHLIB_SOVERSION in t-cygwin. */
+#if DWARF2_UNWIND_INFO
+#define LIBGCC_EH_EXTN "_dw2"
+#else
+#define LIBGCC_EH_EXTN "_sjlj"
+#endif
+#define LIBGCC_SONAME "libgcc_s" LIBGCC_EH_EXTN "-1.dll"
diff --git a/gcc-4.4.0/gcc/config/i386/mm3dnow.h b/gcc-4.4.0/gcc/config/i386/mm3dnow.h
new file mode 100644
index 000000000..0d0735c9a
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/mm3dnow.h
@@ -0,0 +1,215 @@
+/* Copyright (C) 2004, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the mm3dnow.h (of supposedly AMD origin) included with
+ MSVC 7.1. */
+
+#ifndef _MM3DNOW_H_INCLUDED
+#define _MM3DNOW_H_INCLUDED
+
+#ifdef __3dNOW__
+
+#include <mmintrin.h>
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_femms (void)
+{
+ __builtin_ia32_femms();
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgusb (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pf2id (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfacc (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfadd (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfcmpeq (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfcmpge (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfcmpgt (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfmax (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfmin (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfmul (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrcp (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrcpit1 (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrcpit2 (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrsqrt (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfrsqit1 (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfsub (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfsubr (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pi2fd (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhrw (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_prefetch (void *__P)
+{
+ __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_prefetchw (void *__P)
+{
+ __builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_from_float (float __A)
+{
+ return __extension__ (__m64)(__v2sf){ __A, 0.0f };
+}
+
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_to_float (__m64 __A)
+{
+ union { __v2sf v; float a[2]; } __tmp;
+ __tmp.v = (__v2sf)__A;
+ return __tmp.a[0];
+}
+
+#ifdef __3dNOW_A__
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pf2iw (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfnacc (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pfpnacc (__m64 __A, __m64 __B)
+{
+ return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pi2fw (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pswapd (__m64 __A)
+{
+ return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
+}
+
+#endif /* __3dNOW_A__ */
+#endif /* __3dNOW__ */
+
+#endif /* _MM3DNOW_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/mmintrin-common.h b/gcc-4.4.0/gcc/config/i386/mmintrin-common.h
new file mode 100644
index 000000000..0054168cc
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/mmintrin-common.h
@@ -0,0 +1,155 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Common definition of the ROUND and PTEST intrinsics that are shared
+ between SSE4.1 and SSE5. */
+
+#ifndef _MMINTRIN_COMMON_H_INCLUDED
+#define _MMINTRIN_COMMON_H_INCLUDED
+
+#if !defined(__SSE5__) && !defined(__SSE4_1__)
+# error "SSE5 or SSE4.1 instruction set not enabled"
+#else
+
+/* Rounding mode macros. */
+#define _MM_FROUND_TO_NEAREST_INT 0x00
+#define _MM_FROUND_TO_NEG_INF 0x01
+#define _MM_FROUND_TO_POS_INF 0x02
+#define _MM_FROUND_TO_ZERO 0x03
+#define _MM_FROUND_CUR_DIRECTION 0x04
+
+#define _MM_FROUND_RAISE_EXC 0x00
+#define _MM_FROUND_NO_EXC 0x08
+
+#define _MM_FROUND_NINT \
+ (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_FLOOR \
+ (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_CEIL \
+ (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_TRUNC \
+ (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_RINT \
+ (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
+#define _MM_FROUND_NEARBYINT \
+ (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
+
+/* Test Instruction */
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+ (__V & __M) == 0. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testz_si128 (__m128i __M, __m128i __V)
+{
+ return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+ (__V & ~__M) == 0. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testc_si128 (__m128i __M, __m128i __V)
+{
+ return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Packed integer 128-bit bitwise comparison. Return 1 if
+ (__V & __M) != 0 && (__V & ~__M) != 0. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_testnzc_si128 (__m128i __M, __m128i __V)
+{
+ return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
+}
+
+/* Macros for packed integer 128-bit comparison intrinsics. */
+#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
+
+#define _mm_test_all_ones(V) \
+ _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
+
+#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
+
+/* Packed/scalar double precision floating point rounding. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_pd (__m128d __V, const int __M)
+{
+ return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_sd(__m128d __D, __m128d __V, const int __M)
+{
+ return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
+ (__v2df)__V,
+ __M);
+}
+#else
+#define _mm_round_pd(V, M) \
+ ((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M)))
+
+#define _mm_round_sd(D, V, M) \
+ ((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D), \
+ (__v2df)(__m128d)(V), (int)(M)))
+#endif
+
+/* Packed/scalar single precision floating point rounding. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_ps (__m128 __V, const int __M)
+{
+ return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_round_ss (__m128 __D, __m128 __V, const int __M)
+{
+ return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
+ (__v4sf)__V,
+ __M);
+}
+#else
+#define _mm_round_ps(V, M) \
+ ((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M)))
+
+#define _mm_round_ss(D, V, M) \
+ ((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D), \
+ (__v4sf)(__m128)(V), (int)(M)))
+#endif
+
+/* Macros for ceil/floor intrinsics. */
+#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL)
+#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
+
+#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
+#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
+
+#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL)
+#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
+
+#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR)
+#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
+
+#endif /* __SSE5__/__SSE4_1__ */
+
+#endif /* _MMINTRIN_COMMON_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/mmintrin.h b/gcc-4.4.0/gcc/config/i386/mmintrin.h
new file mode 100644
index 000000000..497e22edd
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/mmintrin.h
@@ -0,0 +1,921 @@
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0. */
+
+#ifndef _MMINTRIN_H_INCLUDED
+#define _MMINTRIN_H_INCLUDED
+
+#ifndef __MMX__
+# error "MMX instruction set not enabled"
+#else
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
+
+/* Internal data types for implementing the intrinsics. */
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef short __v4hi __attribute__ ((__vector_size__ (8)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+typedef long long __v1di __attribute__ ((__vector_size__ (8)));
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+/* Empty the multimedia state. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_empty (void)
+{
+ __builtin_ia32_emms ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_empty (void)
+{
+ _mm_empty ();
+}
+
+/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_si64 (int __i)
+{
+ return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_from_int (int __i)
+{
+ return _mm_cvtsi32_si64 (__i);
+}
+
+#ifdef __x86_64__
+/* Convert I to a __m64 object. */
+
+/* Intel intrinsic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_from_int64 (long long __i)
+{
+ return (__m64) __i;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_m64 (long long __i)
+{
+ return (__m64) __i;
+}
+
+/* Microsoft intrinsic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_si64 (long long __i)
+{
+ return (__m64) __i;
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi64x (long long __i)
+{
+ return (__m64) __i;
+}
+#endif
+
+/* Convert the lower 32 bits of the __m64 object into an integer. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_si32 (__m64 __i)
+{
+ return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_to_int (__m64 __i)
+{
+ return _mm_cvtsi64_si32 (__i);
+}
+
+#ifdef __x86_64__
+/* Convert the __m64 object to a 64bit integer. */
+
+/* Intel intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_to_int64 (__m64 __i)
+{
+ return (long long)__i;
+}
+
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtm64_si64 (__m64 __i)
+{
+ return (long long)__i;
+}
+
+/* Microsoft intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_si64x (__m64 __i)
+{
+ return (long long)__i;
+}
+#endif
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+ the result, and the four 16-bit values from M2 into the upper four 8-bit
+ values of the result, all with signed saturation. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_packsswb (__m64 __m1, __m64 __m2)
+{
+ return _mm_packs_pi16 (__m1, __m2);
+}
+
+/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
+ the result, and the two 32-bit values from M2 into the upper two 16-bit
+ values of the result, all with signed saturation. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_packssdw (__m64 __m1, __m64 __m2)
+{
+ return _mm_packs_pi32 (__m1, __m2);
+}
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+ the result, and the four 16-bit values from M2 into the upper four 8-bit
+ values of the result, all with unsigned saturation. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packs_pu16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_packuswb (__m64 __m1, __m64 __m2)
+{
+ return _mm_packs_pu16 (__m1, __m2);
+}
+
+/* Interleave the four 8-bit values from the high half of M1 with the four
+ 8-bit values from the high half of M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckhbw (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpackhi_pi8 (__m1, __m2);
+}
+
+/* Interleave the two 16-bit values from the high half of M1 with the two
+ 16-bit values from the high half of M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckhwd (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpackhi_pi16 (__m1, __m2);
+}
+
+/* Interleave the 32-bit value from the high half of M1 with the 32-bit
+ value from the high half of M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckhdq (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpackhi_pi32 (__m1, __m2);
+}
+
+/* Interleave the four 8-bit values from the low half of M1 with the four
+ 8-bit values from the low half of M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpcklbw (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpacklo_pi8 (__m1, __m2);
+}
+
+/* Interleave the two 16-bit values from the low half of M1 with the two
+ 16-bit values from the low half of M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpcklwd (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpacklo_pi16 (__m1, __m2);
+}
+
+/* Interleave the 32-bit value from the low half of M1 with the 32-bit
+ value from the low half of M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_punpckldq (__m64 __m1, __m64 __m2)
+{
+ return _mm_unpacklo_pi32 (__m1, __m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddb (__m64 __m1, __m64 __m2)
+{
+ return _mm_add_pi8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddw (__m64 __m1, __m64 __m2)
+{
+ return _mm_add_pi16 (__m1, __m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddd (__m64 __m1, __m64 __m2)
+{
+ return _mm_add_pi32 (__m1, __m2);
+}
+
+/* Add the 64-bit values in M1 to the 64-bit values in M2. */
+#ifdef __SSE2__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_si64 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
+}
+#endif
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
+ saturated arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddsb (__m64 __m1, __m64 __m2)
+{
+ return _mm_adds_pi8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
+ saturated arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddsw (__m64 __m1, __m64 __m2)
+{
+ return _mm_adds_pi16 (__m1, __m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
+ saturated arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pu8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddusb (__m64 __m1, __m64 __m2)
+{
+ return _mm_adds_pu8 (__m1, __m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
+ saturated arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_adds_pu16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_paddusw (__m64 __m1, __m64 __m2)
+{
+ return _mm_adds_pu16 (__m1, __m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubb (__m64 __m1, __m64 __m2)
+{
+ return _mm_sub_pi8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubw (__m64 __m1, __m64 __m2)
+{
+ return _mm_sub_pi16 (__m1, __m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubd (__m64 __m1, __m64 __m2)
+{
+ return _mm_sub_pi32 (__m1, __m2);
+}
+
+/* Add the 64-bit values in M1 to the 64-bit values in M2. */
+#ifdef __SSE2__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_si64 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
+}
+#endif
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
+ saturating arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubsb (__m64 __m1, __m64 __m2)
+{
+ return _mm_subs_pi8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+ signed saturating arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubsw (__m64 __m1, __m64 __m2)
+{
+ return _mm_subs_pi16 (__m1, __m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
+ unsigned saturating arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pu8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubusb (__m64 __m1, __m64 __m2)
+{
+ return _mm_subs_pu8 (__m1, __m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+ unsigned saturating arithmetic. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_subs_pu16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psubusw (__m64 __m1, __m64 __m2)
+{
+ return _mm_subs_pu16 (__m1, __m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
+ four 32-bit intermediate results, which are then summed by pairs to
+ produce two 32-bit results. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_madd_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaddwd (__m64 __m1, __m64 __m2)
+{
+ return _mm_madd_pi16 (__m1, __m2);
+}
+
+/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
+ M2 and produce the high 16 bits of the 32-bit results. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhw (__m64 __m1, __m64 __m2)
+{
+ return _mm_mulhi_pi16 (__m1, __m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
+ the low 16 bits of the results. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmullw (__m64 __m1, __m64 __m2)
+{
+ return _mm_mullo_pi16 (__m1, __m2);
+}
+
+/* Shift four 16-bit values in M left by COUNT. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_pi16 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllw (__m64 __m, __m64 __count)
+{
+ return _mm_sll_pi16 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_pi16 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllwi (__m64 __m, int __count)
+{
+ return _mm_slli_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M left by COUNT. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_pi32 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pslld (__m64 __m, __m64 __count)
+{
+ return _mm_sll_pi32 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_pi32 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pslldi (__m64 __m, int __count)
+{
+ return _mm_slli_pi32 (__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sll_si64 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllq (__m64 __m, __m64 __count)
+{
+ return _mm_sll_si64 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_slli_si64 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psllqi (__m64 __m, int __count)
+{
+ return _mm_slli_si64 (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_pi16 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psraw (__m64 __m, __m64 __count)
+{
+ return _mm_sra_pi16 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_pi16 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrawi (__m64 __m, int __count)
+{
+ return _mm_srai_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sra_pi32 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrad (__m64 __m, __m64 __count)
+{
+ return _mm_sra_pi32 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srai_pi32 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psradi (__m64 __m, int __count)
+{
+ return _mm_srai_pi32 (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in zeros. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_pi16 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlw (__m64 __m, __m64 __count)
+{
+ return _mm_srl_pi16 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_pi16 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlwi (__m64 __m, int __count)
+{
+ return _mm_srli_pi16 (__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in zeros. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_pi32 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrld (__m64 __m, __m64 __count)
+{
+ return _mm_srl_pi32 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_pi32 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrldi (__m64 __m, int __count)
+{
+ return _mm_srli_pi32 (__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT; shift in zeros. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srl_si64 (__m64 __m, __m64 __count)
+{
+ return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlq (__m64 __m, __m64 __count)
+{
+ return _mm_srl_si64 (__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_srli_si64 (__m64 __m, int __count)
+{
+ return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psrlqi (__m64 __m, int __count)
+{
+ return _mm_srli_si64 (__m, __count);
+}
+
+/* Bit-wise AND the 64-bit values in M1 and M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_si64 (__m64 __m1, __m64 __m2)
+{
+ return __builtin_ia32_pand (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pand (__m64 __m1, __m64 __m2)
+{
+ return _mm_and_si64 (__m1, __m2);
+}
+
+/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
+ 64-bit value in M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_si64 (__m64 __m1, __m64 __m2)
+{
+ return __builtin_ia32_pandn (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pandn (__m64 __m1, __m64 __m2)
+{
+ return _mm_andnot_si64 (__m1, __m2);
+}
+
+/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_si64 (__m64 __m1, __m64 __m2)
+{
+ return __builtin_ia32_por (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_por (__m64 __m1, __m64 __m2)
+{
+ return _mm_or_si64 (__m1, __m2);
+}
+
+/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_si64 (__m64 __m1, __m64 __m2)
+{
+ return __builtin_ia32_pxor (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pxor (__m64 __m1, __m64 __m2)
+{
+ return _mm_xor_si64 (__m1, __m2);
+}
+
+/* Compare eight 8-bit values. The result of the comparison is 0xFF if the
+ test is true and zero if false. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpeqb (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpeq_pi8 (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpgtb (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpgt_pi8 (__m1, __m2);
+}
+
+/* Compare four 16-bit values. The result of the comparison is 0xFFFF if
+ the test is true and zero if false. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpeqw (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpeq_pi16 (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpgtw (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpgt_pi16 (__m1, __m2);
+}
+
+/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
+ the test is true and zero if false. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpeqd (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpeq_pi32 (__m1, __m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
+{
+ return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pcmpgtd (__m64 __m1, __m64 __m2)
+{
+ return _mm_cmpgt_pi32 (__m1, __m2);
+}
+
+/* Creates a 64-bit zero. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_si64 (void)
+{
+ return (__m64)0LL;
+}
+
+/* Creates a vector of two 32-bit values; I0 is least significant. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi32 (int __i1, int __i0)
+{
+ return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
+}
+
+/* Creates a vector of four 16-bit values; W0 is least significant. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
+{
+ return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
+}
+
+/* Creates a vector of eight 8-bit values; B0 is least significant. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
+ char __b3, char __b2, char __b1, char __b0)
+{
+ return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
+ __b4, __b5, __b6, __b7);
+}
+
+/* Similar, but with the arguments in reverse order. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pi32 (int __i0, int __i1)
+{
+ return _mm_set_pi32 (__i1, __i0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
+{
+ return _mm_set_pi16 (__w3, __w2, __w1, __w0);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
+ char __b4, char __b5, char __b6, char __b7)
+{
+ return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
+}
+
+/* Creates a vector of two 32-bit values, both elements containing I. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pi32 (int __i)
+{
+ return _mm_set_pi32 (__i, __i);
+}
+
+/* Creates a vector of four 16-bit values, all elements containing W. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pi16 (short __w)
+{
+ return _mm_set_pi16 (__w, __w, __w, __w);
+}
+
+/* Creates a vector of eight 8-bit values, all elements containing B. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pi8 (char __b)
+{
+ return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
+}
+
+#endif /* __MMX__ */
+#endif /* _MMINTRIN_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/mmx.md b/gcc-4.4.0/gcc/config/i386/mmx.md
new file mode 100644
index 000000000..5184b1d7f
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/mmx.md
@@ -0,0 +1,1628 @@
+;; GCC machine description for MMX and 3dNOW! instructions
+;; Copyright (C) 2005, 2007, 2008
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The MMX and 3dNOW! patterns are in the same file because they use
+;; the same register file, and 3dNOW! adds a number of extensions to
+;; the base integer MMX isa.
+
+;; Note! Except for the basic move instructions, *all* of these
+;; patterns are outside the normal optabs namespace. This is because
+;; use of these registers requires the insertion of emms or femms
+;; instructions to return to normal fpu mode. The compiler doesn't
+;; know how to do that itself, which means it's up to the user. Which
+;; means that we should never use any of these patterns except at the
+;; direction of the user via a builtin.
+
+;; 8 byte integral modes handled by MMX (and by extension, SSE)
+(define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
+
+;; All 8-byte vector modes handled by MMX
+(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
+
+;; Mix-n-match
+(define_mode_iterator MMXMODE12 [V8QI V4HI])
+(define_mode_iterator MMXMODE24 [V4HI V2SI])
+(define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
+
+;; Mapping from integer vector mode to mnemonic suffix
+(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Move patterns
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; All of these patterns are enabled for MMX as well as 3dNOW.
+;; This is essential for maintaining stable calling conventions.
+
+(define_expand "mov<mode>"
+ [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand" "")
+ (match_operand:MMXMODEI8 1 "nonimmediate_operand" ""))]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_move (<MODE>mode, operands);
+ DONE;
+})
+
+(define_insn "*mov<mode>_internal_rex64"
+ [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
+ "=rm,r,!?y,!?y ,m ,!y,*Y2,x,x ,m,r,Yi")
+ (match_operand:MMXMODEI8 1 "vector_move_operand"
+ "Cr ,m,C ,!?ym,!?y,*Y2,!y,C,xm,x,Yi,r"))]
+ "TARGET_64BIT && TARGET_MMX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ mov{q}\t{%1, %0|%0, %1}
+ mov{q}\t{%1, %0|%0, %1}
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ %vpxor\t%0, %d0
+ %vmovq\t{%1, %0|%0, %1}
+ %vmovq\t{%1, %0|%0, %1}
+ %vmovq\t{%1, %0|%0, %1}
+ %vmovq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
+ (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "7,8,9,10,11")
+ (const_string "maybe_vex")
+ (const_string "orig")))
+ (set_attr "mode" "DI")])
+
+(define_insn "*mov<mode>_internal_avx"
+ [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
+ "=!?y,!?y,m ,!y ,*Y2,*Y2,*Y2 ,m ,r ,m")
+ (match_operand:MMXMODEI8 1 "vector_move_operand"
+ "C ,!ym,!?y,*Y2,!y ,C ,*Y2m,*Y2,irm,r"))]
+ "TARGET_AVX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ vpxor\t%0, %0, %0
+ vmovq\t{%1, %0|%0, %1}
+ vmovq\t{%1, %0|%0, %1}
+ #
+ #"
+ [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,*,*")
+ (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "5,6,7")
+ (const_string "vex")
+ (const_string "orig")))
+ (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,DI,DI,DI")])
+
+(define_insn "*mov<mode>_internal"
+ [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
+ "=!?y,!?y,m ,!y ,*Y2,*Y2,*Y2 ,m ,*x,*x,*x,m ,r ,m")
+ (match_operand:MMXMODEI8 1 "vector_move_operand"
+ "C ,!ym,!?y,*Y2,!y ,C ,*Y2m,*Y2,C ,*x,m ,*x,irm,r"))]
+ "TARGET_MMX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}
+ #
+ #"
+ [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov,*,*")
+ (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*,*,*,*,*")
+ (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
+(define_expand "movv2sf"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+ (match_operand:V2SF 1 "nonimmediate_operand" ""))]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_move (V2SFmode, operands);
+ DONE;
+})
+
+(define_insn "*movv2sf_internal_rex64_avx"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand"
+ "=rm,r ,!?y,!?y ,m ,!y,Y2,x,x,x,m,r,x")
+ (match_operand:V2SF 1 "vector_move_operand"
+ "Cr ,m ,C ,!?ym,!y,Y2,!y,C,x,m,x,x,r"))]
+ "TARGET_64BIT && TARGET_AVX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ mov{q}\t{%1, %0|%0, %1}
+ mov{q}\t{%1, %0|%0, %1}
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ vxorps\t%0, %0, %0
+ vmovaps\t{%1, %0|%0, %1}
+ vmovlps\t{%1, %0, %0|%0, %0, %1}
+ vmovlps\t{%1, %0|%0, %1}
+ vmovq\t{%1, %0|%0, %1}
+ vmovq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,sselog1,ssemov,ssemov,ssemov,ssemov")
+ (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "7,8,9,10,11,12")
+ (const_string "vex")
+ (const_string "orig")))
+ (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
+(define_insn "*movv2sf_internal_rex64"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand"
+ "=rm,r ,!?y,!?y ,m ,!y,*Y2,x,x,x,m,r,Yi")
+ (match_operand:V2SF 1 "vector_move_operand"
+ "Cr ,m ,C ,!?ym,!y,*Y2,!y,C,x,m,x,Yi,r"))]
+ "TARGET_64BIT && TARGET_MMX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ mov{q}\t{%1, %0|%0, %1}
+ mov{q}\t{%1, %0|%0, %1}
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,sselog1,ssemov,ssemov,ssemov,ssemov")
+ (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
+ (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
+(define_insn "*movv2sf_internal_avx"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand"
+ "=!?y,!?y ,m ,!y ,*Y2,*x,*x,*x,m ,r ,m")
+ (match_operand:V2SF 1 "vector_move_operand"
+ "C ,!?ym,!?y,*Y2,!y ,C ,*x,m ,*x,irm,r"))]
+ "TARGET_AVX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ vxorps\t%0, %0, %0
+ vmovaps\t{%1, %0|%0, %1}
+ vmovlps\t{%1, %0, %0|%0, %0, %1}
+ vmovlps\t{%1, %0|%0, %1}
+ #
+ #"
+ [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,*,*")
+ (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*,*")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "5,6,7,8")
+ (const_string "vex")
+ (const_string "orig")))
+ (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
+(define_insn "*movv2sf_internal"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand"
+ "=!?y,!?y ,m ,!y ,*Y2,*x,*x,*x,m ,r ,m")
+ (match_operand:V2SF 1 "vector_move_operand"
+ "C ,!?ym,!?y,*Y2,!y ,C ,*x,m ,*x,irm,r"))]
+ "TARGET_MMX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ pxor\t%0, %0
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movdq2q\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ xorps\t%0, %0
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}
+ #
+ #"
+ [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,*,*")
+ (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*,*")
+ (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
+;; %%% This multiword shite has got to go.
+(define_split
+ [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "")
+ (match_operand:MMXMODE 1 "general_operand" ""))]
+ "!TARGET_64BIT && reload_completed
+ && (!MMX_REG_P (operands[0]) && !SSE_REG_P (operands[0]))
+ && (!MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))"
+ [(const_int 0)]
+ "ix86_split_long_move (operands); DONE;")
+
+(define_expand "push<mode>1"
+ [(match_operand:MMXMODE 0 "register_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_push (<MODE>mode, operands[0]);
+ DONE;
+})
+
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "")
+ (match_operand:MMXMODE 1 "nonimmediate_operand" ""))]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_move (<MODE>mode, operands);
+ DONE;
+})
+
+(define_insn "sse_movntdi"
+ [(set (match_operand:DI 0 "memory_operand" "=m")
+ (unspec:DI [(match_operand:DI 1 "register_operand" "y")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "movntq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxmov")
+ (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_addv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "")
+ (plus:V2SF
+ (match_operand:V2SF 1 "nonimmediate_operand" "")
+ (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+ "TARGET_3DNOW"
+ "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
+
+(define_insn "*mmx_addv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
+ "pfadd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+(define_expand "mmx_subv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "")
+ (minus:V2SF (match_operand:V2SF 1 "register_operand" "")
+ (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+ "TARGET_3DNOW"
+ "")
+
+(define_expand "mmx_subrv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "")
+ (minus:V2SF (match_operand:V2SF 2 "register_operand" "")
+ (match_operand:V2SF 1 "nonimmediate_operand" "")))]
+ "TARGET_3DNOW"
+ "")
+
+(define_insn "*mmx_subv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y,y")
+ (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
+ "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ pfsub\t{%2, %0|%0, %2}
+ pfsubr\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+(define_expand "mmx_mulv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "")
+ (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "")
+ (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+ "TARGET_3DNOW"
+ "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
+
+(define_insn "*mmx_mulv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
+ "pfmul\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "V2SF")])
+
+;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
+;; isn't really correct, as those rtl operators aren't defined when
+;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
+
+(define_expand "mmx_<code>v2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "")
+ (smaxmin:V2SF
+ (match_operand:V2SF 1 "nonimmediate_operand" "")
+ (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+ "TARGET_3DNOW"
+{
+ if (!flag_finite_math_only)
+ operands[1] = force_reg (V2SFmode, operands[1]);
+ ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
+})
+
+(define_insn "*mmx_<code>v2sf3_finite"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (smaxmin:V2SF
+ (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW && flag_finite_math_only
+ && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
+ "pf<maxminfprefix>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "*mmx_<code>v2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (smaxmin:V2SF
+ (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pf<maxminfprefix>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rcpv2sf2"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRCP))]
+ "TARGET_3DNOW"
+ "pfrcp\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rcpit1v2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRCPIT1))]
+ "TARGET_3DNOW"
+ "pfrcpit1\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rcpit2v2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRCPIT2))]
+ "TARGET_3DNOW"
+ "pfrcpit2\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rsqrtv2sf2"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRSQRT))]
+ "TARGET_3DNOW"
+ "pfrsqrt\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_rsqit1v2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PFRSQIT1))]
+ "TARGET_3DNOW"
+ "pfrsqit1\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmx")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_haddv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_concat:V2SF
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V2SF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SF
+ (vec_select:SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_3DNOW"
+ "pfacc\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_hsubv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_concat:V2SF
+ (minus:SF
+ (vec_select:SF
+ (match_operand:V2SF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (minus:SF
+ (vec_select:SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_3DNOW_A"
+ "pfnacc\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_addsubv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_merge:V2SF
+ (plus:V2SF
+ (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym"))
+ (minus:V2SF (match_dup 1) (match_dup 2))
+ (const_int 1)))]
+ "TARGET_3DNOW_A"
+ "pfpnacc\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_eqv2sf3"
+ [(set (match_operand:V2SI 0 "register_operand" "")
+ (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "")
+ (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+ "TARGET_3DNOW"
+ "ix86_fixup_binary_operands_no_copy (EQ, V2SFmode, operands);")
+
+(define_insn "*mmx_eqv2sf3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)"
+ "pfcmpeq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_gtv2sf3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pfcmpgt\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_gev2sf3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pfcmpge\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_pf2id"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pf2id\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_pf2iw"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (sign_extend:V2SI
+ (ss_truncate:V2HI
+ (fix:V2SI
+ (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
+ "TARGET_3DNOW_A"
+ "pf2iw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_pi2fw"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (float:V2SF
+ (sign_extend:V2SI
+ (truncate:V2HI
+ (match_operand:V2SI 1 "nonimmediate_operand" "ym")))))]
+ "TARGET_3DNOW_A"
+ "pi2fw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "mmx_floatv2si2"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
+ "TARGET_3DNOW"
+ "pi2fd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_pswapdv2sf2"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
+ (parallel [(const_int 1) (const_int 0)])))]
+ "TARGET_3DNOW_A"
+ "pswapd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "V2SF")])
+
+(define_insn "*vec_dupv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=y")
+ (vec_duplicate:V2SF
+ (match_operand:SF 1 "register_operand" "0")))]
+ "TARGET_MMX"
+ "punpckldq\t%0, %0"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "*mmx_concatv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=y,y")
+ (vec_concat:V2SF
+ (match_operand:SF 1 "nonimmediate_operand" " 0,rm")
+ (match_operand:SF 2 "vector_move_operand" "ym,C")))]
+ "TARGET_MMX && !TARGET_SSE"
+ "@
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt,mmxmov")
+ (set_attr "mode" "DI")])
+
+(define_expand "vec_setv2sf"
+ [(match_operand:V2SF 0 "register_operand" "")
+ (match_operand:SF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn_and_split "*vec_extractv2sf_0"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x, m,y ,m,f,r")
+ (vec_select:SF
+ (match_operand:V2SF 1 "nonimmediate_operand" " xm,x,ym,y,m,m")
+ (parallel [(const_int 0)])))]
+ "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (SFmode, REGNO (op1));
+ else
+ op1 = gen_lowpart (SFmode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*vec_extractv2sf_1"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,y,x,f,r")
+ (vec_select:SF
+ (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o,o,o,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ punpckhdq\t%0, %0
+ unpckhps\t%0, %0
+ #
+ #
+ #
+ #"
+ [(set_attr "type" "mmxcvt,sselog1,mmxmov,ssemov,fmov,imov")
+ (set_attr "mode" "DI,V4SF,SF,SF,SF,SF")])
+
+(define_split
+ [(set (match_operand:SF 0 "register_operand" "")
+ (vec_select:SF
+ (match_operand:V2SF 1 "memory_operand" "")
+ (parallel [(const_int 1)])))]
+ "TARGET_MMX && reload_completed"
+ [(const_int 0)]
+{
+ operands[1] = adjust_address (operands[1], SFmode, 4);
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "vec_extractv2sf"
+ [(match_operand:SF 0 "register_operand" "")
+ (match_operand:V2SF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv2sf"
+ [(match_operand:V2SF 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_<plusminus_insn><mode>3"
+ [(set (match_operand:MMXMODEI8 0 "register_operand" "")
+ (plusminus:MMXMODEI8
+ (match_operand:MMXMODEI8 1 "nonimmediate_operand" "")
+ (match_operand:MMXMODEI8 2 "nonimmediate_operand" "")))]
+ "TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*mmx_<plusminus_insn><mode>3"
+ [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+ (plusminus:MMXMODEI8
+ (match_operand:MMXMODEI8 1 "nonimmediate_operand" "<comm>0")
+ (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
+ "(TARGET_MMX || (TARGET_SSE2 && <MODE>mode == V1DImode))
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_<plusminus_insn><mode>3"
+ [(set (match_operand:MMXMODE12 0 "register_operand" "")
+ (sat_plusminus:MMXMODE12
+ (match_operand:MMXMODE12 1 "nonimmediate_operand" "")
+ (match_operand:MMXMODE12 2 "nonimmediate_operand" "")))]
+ "TARGET_MMX"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*mmx_<plusminus_insn><mode>3"
+ [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+ (sat_plusminus:MMXMODE12
+ (match_operand:MMXMODE12 1 "nonimmediate_operand" "<comm>0")
+ (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_mulv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "")
+ (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "")
+ (match_operand:V4HI 2 "nonimmediate_operand" "")))]
+ "TARGET_MMX"
+ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_mulv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "pmullw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_smulv4hi3_highpart"
+ [(set (match_operand:V4HI 0 "register_operand" "")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" ""))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "")))
+ (const_int 16))))]
+ "TARGET_MMX"
+ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_smulv4hi3_highpart"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (const_int 16))))]
+ "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "pmulhw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_umulv4hi3_highpart"
+ [(set (match_operand:V4HI 0 "register_operand" "")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (mult:V4SI
+ (zero_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" ""))
+ (zero_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "")))
+ (const_int 16))))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_umulv4hi3_highpart"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (mult:V4SI
+ (zero_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (const_int 16))))]
+ "(TARGET_SSE || TARGET_3DNOW_A)
+ && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "pmulhuw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_pmaddwd"
+ [(set (match_operand:V2SI 0 "register_operand" "")
+ (plus:V2SI
+ (mult:V2SI
+ (sign_extend:V2SI
+ (vec_select:V2HI
+ (match_operand:V4HI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)])))
+ (sign_extend:V2SI
+ (vec_select:V2HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)]))))
+ (mult:V2SI
+ (sign_extend:V2SI
+ (vec_select:V2HI (match_dup 1)
+ (parallel [(const_int 1) (const_int 3)])))
+ (sign_extend:V2SI
+ (vec_select:V2HI (match_dup 2)
+ (parallel [(const_int 1) (const_int 3)]))))))]
+ "TARGET_MMX"
+ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_pmaddwd"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (plus:V2SI
+ (mult:V2SI
+ (sign_extend:V2SI
+ (vec_select:V2HI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0) (const_int 2)])))
+ (sign_extend:V2SI
+ (vec_select:V2HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0) (const_int 2)]))))
+ (mult:V2SI
+ (sign_extend:V2SI
+ (vec_select:V2HI (match_dup 1)
+ (parallel [(const_int 1) (const_int 3)])))
+ (sign_extend:V2SI
+ (vec_select:V2HI (match_dup 2)
+ (parallel [(const_int 1) (const_int 3)]))))))]
+ "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "pmaddwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_pmulhrwv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" ""))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "")))
+ (const_vector:V4SI [(const_int 32768) (const_int 32768)
+ (const_int 32768) (const_int 32768)]))
+ (const_int 16))))]
+ "TARGET_3DNOW"
+ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*mmx_pmulhrwv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (const_vector:V4SI [(const_int 32768) (const_int 32768)
+ (const_int 32768) (const_int 32768)]))
+ (const_int 16))))]
+ "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "pmulhrw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
+
+(define_expand "sse2_umulv1siv1di3"
+ [(set (match_operand:V1DI 0 "register_operand" "")
+ (mult:V1DI
+ (zero_extend:V1DI
+ (vec_select:V1SI
+ (match_operand:V2SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0)])))
+ (zero_extend:V1DI
+ (vec_select:V1SI
+ (match_operand:V2SI 2 "nonimmediate_operand" "")
+ (parallel [(const_int 0)])))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
+
+(define_insn "*sse2_umulv1siv1di3"
+ [(set (match_operand:V1DI 0 "register_operand" "=y")
+ (mult:V1DI
+ (zero_extend:V1DI
+ (vec_select:V1SI
+ (match_operand:V2SI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0)])))
+ (zero_extend:V1DI
+ (vec_select:V1SI
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)])))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
+ "pmuludq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxmul")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_<code>v4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "")
+ (smaxmin:V4HI
+ (match_operand:V4HI 1 "nonimmediate_operand" "")
+ (match_operand:V4HI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, V4HImode, operands);")
+
+(define_insn "*mmx_<code>v4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (smaxmin:V4HI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ "(TARGET_SSE || TARGET_3DNOW_A)
+ && ix86_binary_operator_ok (<CODE>, V4HImode, operands)"
+ "p<maxminiprefix>w\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_<code>v8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "")
+ (umaxmin:V8QI
+ (match_operand:V8QI 1 "nonimmediate_operand" "")
+ (match_operand:V8QI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, V8QImode, operands);")
+
+(define_insn "*mmx_<code>v8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (umaxmin:V8QI
+ (match_operand:V8QI 1 "nonimmediate_operand" "%0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ "(TARGET_SSE || TARGET_3DNOW_A)
+ && ix86_binary_operator_ok (<CODE>, V8QImode, operands)"
+ "p<maxminiprefix>b\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_ashr<mode>3"
+ [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
+ (ashiftrt:MMXMODE24
+ (match_operand:MMXMODE24 1 "register_operand" "0")
+ (match_operand:SI 2 "nonmemory_operand" "yN")))]
+ "TARGET_MMX"
+ "psra<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_lshr<mode>3"
+ [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+ (lshiftrt:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand" "0")
+ (match_operand:SI 2 "nonmemory_operand" "yN")))]
+ "TARGET_MMX"
+ "psrl<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_ashl<mode>3"
+ [(set (match_operand:MMXMODE248 0 "register_operand" "=y")
+ (ashift:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand" "0")
+ (match_operand:SI 2 "nonmemory_operand" "yN")))]
+ "TARGET_MMX"
+ "psll<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_eq<mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
+ (eq:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
+ "TARGET_MMX"
+ "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+
+(define_insn "*mmx_eq<mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (eq:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+ "pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_gt<mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (gt:MMXMODEI
+ (match_operand:MMXMODEI 1 "register_operand" "0")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcmp")
+ (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_andnot<mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (and:MMXMODEI
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX"
+ "pandn\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_<code><mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
+ (plogic:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
+ "TARGET_MMX"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*mmx_<code><mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (plogic:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
+ "TARGET_MMX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "p<plogicprefix>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxadd")
+ (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "mmx_packsswb"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (vec_concat:V8QI
+ (ss_truncate:V4QI
+ (match_operand:V4HI 1 "register_operand" "0"))
+ (ss_truncate:V4QI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
+ "TARGET_MMX"
+ "packsswb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_packssdw"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (ss_truncate:V2HI
+ (match_operand:V2SI 1 "register_operand" "0"))
+ (ss_truncate:V2HI
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))]
+ "TARGET_MMX"
+ "packssdw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_packuswb"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (vec_concat:V8QI
+ (us_truncate:V4QI
+ (match_operand:V4HI 1 "register_operand" "0"))
+ (us_truncate:V4QI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))]
+ "TARGET_MMX"
+ "packuswb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckhbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (vec_select:V8QI
+ (vec_concat:V16QI
+ (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+ (parallel [(const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))]
+ "TARGET_MMX"
+ "punpckhbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpcklbw"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (vec_select:V8QI
+ (vec_concat:V16QI
+ (match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)])))]
+ "TARGET_MMX"
+ "punpcklbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckhwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_select:V4HI
+ (vec_concat:V8HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_MMX"
+ "punpckhwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpcklwd"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_select:V4HI
+ (vec_concat:V8HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_MMX"
+ "punpcklwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckhdq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI
+ (vec_concat:V4SI
+ (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_MMX"
+ "punpckhdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_punpckldq"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI
+ (vec_concat:V4SI
+ (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_MMX"
+ "punpckldq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_pinsrw"
+ [(set (match_operand:V4HI 0 "register_operand" "")
+ (vec_merge:V4HI
+ (vec_duplicate:V4HI
+ (match_operand:SI 2 "nonimmediate_operand" ""))
+ (match_operand:V4HI 1 "register_operand" "")
+ (match_operand:SI 3 "const_0_to_3_operand" "")))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+{
+ operands[2] = gen_lowpart (HImode, operands[2]);
+ operands[3] = GEN_INT (1 << INTVAL (operands[3]));
+})
+
+(define_insn "*mmx_pinsrw"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_merge:V4HI
+ (vec_duplicate:V4HI
+ (match_operand:HI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V4HI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+}
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_pextrw"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "y")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "pextrw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_pshufw"
+ [(match_operand:V4HI 0 "register_operand" "")
+ (match_operand:V4HI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")]
+ "TARGET_SSE || TARGET_3DNOW_A"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3)));
+ DONE;
+})
+
+(define_insn "mmx_pshufw_1"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_select:V4HI
+ (match_operand:V4HI 1 "nonimmediate_operand" "ym")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "")
+ (match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_0_to_3_operand" "")])))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_pswapdv2si2"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI
+ (match_operand:V2SI 1 "nonimmediate_operand" "ym")
+ (parallel [(const_int 1) (const_int 0)])))]
+ "TARGET_3DNOW_A"
+ "pswapd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "*vec_dupv4hi"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_duplicate:V4HI
+ (truncate:HI
+ (match_operand:SI 1 "register_operand" "0"))))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "pshufw\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "*vec_dupv2si"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_duplicate:V2SI
+ (match_operand:SI 1 "register_operand" "0")))]
+ "TARGET_MMX"
+ "punpckldq\t%0, %0"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "*mmx_concatv2si"
+ [(set (match_operand:V2SI 0 "register_operand" "=y,y")
+ (vec_concat:V2SI
+ (match_operand:SI 1 "nonimmediate_operand" " 0,rm")
+ (match_operand:SI 2 "vector_move_operand" "ym,C")))]
+ "TARGET_MMX && !TARGET_SSE"
+ "@
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt,mmxmov")
+ (set_attr "mode" "DI")])
+
+(define_expand "vec_setv2si"
+ [(match_operand:V2SI 0 "register_operand" "")
+ (match_operand:SI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn_and_split "*vec_extractv2si_0"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=x,m,y, m,r")
+ (vec_select:SI
+ (match_operand:V2SI 1 "nonimmediate_operand" "xm,x,ym,y,m")
+ (parallel [(const_int 0)])))]
+ "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (SImode, REGNO (op1));
+ else
+ op1 = gen_lowpart (SImode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*vec_extractv2si_1"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=y,Y2,Y2,x,y,x,r")
+ (vec_select:SI
+ (match_operand:V2SI 1 "nonimmediate_operand" " 0,0 ,Y2,0,o,o,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ punpckhdq\t%0, %0
+ punpckhdq\t%0, %0
+ pshufd\t{$85, %1, %0|%0, %1, 85}
+ unpckhps\t%0, %0
+ #
+ #
+ #"
+ [(set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,mmxmov,ssemov,imov")
+ (set_attr "mode" "DI,TI,TI,V4SF,SI,SI,SI")])
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (vec_select:SI
+ (match_operand:V2SI 1 "memory_operand" "")
+ (parallel [(const_int 1)])))]
+ "TARGET_MMX && reload_completed"
+ [(const_int 0)]
+{
+ operands[1] = adjust_address (operands[1], SImode, 4);
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "vec_extractv2si"
+ [(match_operand:SI 0 "register_operand" "")
+ (match_operand:V2SI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv2si"
+ [(match_operand:V2SI 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "vec_setv4hi"
+ [(match_operand:V4HI 0 "register_operand" "")
+ (match_operand:HI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extractv4hi"
+ [(match_operand:HI 0 "register_operand" "")
+ (match_operand:V4HI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv4hi"
+ [(match_operand:V4HI 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "vec_setv8qi"
+ [(match_operand:V8QI 0 "register_operand" "")
+ (match_operand:QI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extractv8qi"
+ [(match_operand:QI 0 "register_operand" "")
+ (match_operand:V8QI 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_MMX"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv8qi"
+ [(match_operand:V8QI 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Miscellaneous
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mmx_uavgv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "")
+ (truncate:V8QI
+ (lshiftrt:V8HI
+ (plus:V8HI
+ (plus:V8HI
+ (zero_extend:V8HI
+ (match_operand:V8QI 1 "nonimmediate_operand" ""))
+ (zero_extend:V8HI
+ (match_operand:V8QI 2 "nonimmediate_operand" "")))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSE || TARGET_3DNOW"
+ "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
+
+(define_insn "*mmx_uavgv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (truncate:V8QI
+ (lshiftrt:V8HI
+ (plus:V8HI
+ (plus:V8HI
+ (zero_extend:V8HI
+ (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:V8HI
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "(TARGET_SSE || TARGET_3DNOW)
+ && ix86_binary_operator_ok (PLUS, V8QImode, operands)"
+{
+ /* These two instructions have the same operation, but their encoding
+ is different. Prefer the one that is de facto standard. */
+ if (TARGET_SSE || TARGET_3DNOW_A)
+ return "pavgb\t{%2, %0|%0, %2}";
+ else
+ return "pavgusb\t{%2, %0|%0, %2}";
+}
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_uavgv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (plus:V4SI
+ (plus:V4SI
+ (zero_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" ""))
+ (zero_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "")))
+ (const_vector:V4SI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
+
+(define_insn "*mmx_uavgv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (plus:V4SI
+ (plus:V4SI
+ (zero_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (const_vector:V4SI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "(TARGET_SSE || TARGET_3DNOW_A)
+ && ix86_binary_operator_ok (PLUS, V4HImode, operands)"
+ "pavgw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_psadbw"
+ [(set (match_operand:V1DI 0 "register_operand" "=y")
+ (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PSADBW))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "psadbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "mmxshft")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_pmovmskb"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "pmovmskb\t{%1, %0|%0, %1}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_expand "mmx_maskmovq"
+ [(set (match_operand:V8QI 0 "memory_operand" "")
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "")
+ (match_operand:V8QI 2 "register_operand" "")
+ (match_dup 0)]
+ UNSPEC_MASKMOV))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "")
+
+(define_insn "*mmx_maskmovq"
+ [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D"))
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+ (match_operand:V8QI 2 "register_operand" "y")
+ (mem:V8QI (match_dup 0))]
+ UNSPEC_MASKMOV))]
+ "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT"
+ ;; @@@ check ordering of operands in intel/nonintel syntax
+ "maskmovq\t{%2, %1|%1, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "*mmx_maskmovq_rex"
+ [(set (mem:V8QI (match_operand:DI 0 "register_operand" "D"))
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+ (match_operand:V8QI 2 "register_operand" "y")
+ (mem:V8QI (match_dup 0))]
+ UNSPEC_MASKMOV))]
+ "(TARGET_SSE || TARGET_3DNOW_A) && TARGET_64BIT"
+ ;; @@@ check ordering of operands in intel/nonintel syntax
+ "maskmovq\t{%2, %1|%1, %2}"
+ [(set_attr "type" "mmxcvt")
+ (set_attr "mode" "DI")])
+
+(define_insn "mmx_emms"
+ [(unspec_volatile [(const_int 0)] UNSPECV_EMMS)
+ (clobber (reg:XF ST0_REG))
+ (clobber (reg:XF ST1_REG))
+ (clobber (reg:XF ST2_REG))
+ (clobber (reg:XF ST3_REG))
+ (clobber (reg:XF ST4_REG))
+ (clobber (reg:XF ST5_REG))
+ (clobber (reg:XF ST6_REG))
+ (clobber (reg:XF ST7_REG))
+ (clobber (reg:DI MM0_REG))
+ (clobber (reg:DI MM1_REG))
+ (clobber (reg:DI MM2_REG))
+ (clobber (reg:DI MM3_REG))
+ (clobber (reg:DI MM4_REG))
+ (clobber (reg:DI MM5_REG))
+ (clobber (reg:DI MM6_REG))
+ (clobber (reg:DI MM7_REG))]
+ "TARGET_MMX"
+ "emms"
+ [(set_attr "type" "mmx")
+ (set_attr "memory" "unknown")])
+
+(define_insn "mmx_femms"
+ [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS)
+ (clobber (reg:XF ST0_REG))
+ (clobber (reg:XF ST1_REG))
+ (clobber (reg:XF ST2_REG))
+ (clobber (reg:XF ST3_REG))
+ (clobber (reg:XF ST4_REG))
+ (clobber (reg:XF ST5_REG))
+ (clobber (reg:XF ST6_REG))
+ (clobber (reg:XF ST7_REG))
+ (clobber (reg:DI MM0_REG))
+ (clobber (reg:DI MM1_REG))
+ (clobber (reg:DI MM2_REG))
+ (clobber (reg:DI MM3_REG))
+ (clobber (reg:DI MM4_REG))
+ (clobber (reg:DI MM5_REG))
+ (clobber (reg:DI MM6_REG))
+ (clobber (reg:DI MM7_REG))]
+ "TARGET_3DNOW"
+ "femms"
+ [(set_attr "type" "mmx")
+ (set_attr "memory" "none")])
diff --git a/gcc-4.4.0/gcc/config/i386/msformat-c.c b/gcc-4.4.0/gcc/config/i386/msformat-c.c
new file mode 100644
index 000000000..4648c8d58
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/msformat-c.c
@@ -0,0 +1,197 @@
+/* Check calls to formatted I/O functions (-Wformat).
+ Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+ 2001, 2002, 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "flags.h"
+#include "c-common.h"
+#include "toplev.h"
+#include "intl.h"
+#include "diagnostic.h"
+#include "langhooks.h"
+#include "c-format.h"
+#include "alloc-pool.h"
+
+/* Mingw specific format attributes ms_printf, ms_scanf, and ms_strftime. */
+
+static format_length_info ms_printf_length_specs[] =
+{
+ { "h", FMT_LEN_h, STD_C89, NULL, 0, 0 },
+ { "l", FMT_LEN_l, STD_C89, NULL, 0, 0 },
+ { "I32", FMT_LEN_l, STD_EXT, NULL, 0, 0 },
+ { "I64", FMT_LEN_ll, STD_EXT, NULL, 0, 0 },
+ { "I", FMT_LEN_L, STD_EXT, NULL, 0, 0 },
+ { NULL, 0, 0, NULL, 0, 0 }
+};
+
+static const format_flag_spec ms_printf_flag_specs[] =
+{
+ { ' ', 0, 0, N_("' ' flag"), N_("the ' ' printf flag"), STD_C89 },
+ { '+', 0, 0, N_("'+' flag"), N_("the '+' printf flag"), STD_C89 },
+ { '#', 0, 0, N_("'#' flag"), N_("the '#' printf flag"), STD_C89 },
+ { '0', 0, 0, N_("'0' flag"), N_("the '0' printf flag"), STD_C89 },
+ { '-', 0, 0, N_("'-' flag"), N_("the '-' printf flag"), STD_C89 },
+ { '\'', 0, 0, N_("''' flag"), N_("the ''' printf flag"), STD_EXT },
+ { 'w', 0, 0, N_("field width"), N_("field width in printf format"), STD_C89 },
+ { 'p', 0, 0, N_("precision"), N_("precision in printf format"), STD_C89 },
+ { 'L', 0, 0, N_("length modifier"), N_("length modifier in printf format"), STD_C89 },
+ { 0, 0, 0, NULL, NULL, 0 }
+};
+
+static const format_flag_pair ms_printf_flag_pairs[] =
+{
+ { ' ', '+', 1, 0 },
+ { '0', '-', 1, 0 }, { '0', 'p', 1, 'i' },
+ { 0, 0, 0, 0 }
+};
+
+static const format_flag_spec ms_scanf_flag_specs[] =
+{
+ { '*', 0, 0, N_("assignment suppression"), N_("the assignment suppression scanf feature"), STD_C89 },
+ { 'a', 0, 0, N_("'a' flag"), N_("the 'a' scanf flag"), STD_EXT },
+ { 'w', 0, 0, N_("field width"), N_("field width in scanf format"), STD_C89 },
+ { 'L', 0, 0, N_("length modifier"), N_("length modifier in scanf format"), STD_C89 },
+ { '\'', 0, 0, N_("''' flag"), N_("the ''' scanf flag"), STD_EXT },
+ { 0, 0, 0, NULL, NULL, 0 }
+};
+
+static const format_flag_pair ms_scanf_flag_pairs[] =
+{
+ { '*', 'L', 0, 0 },
+ { 0, 0, 0, 0 }
+};
+
+static const format_flag_spec ms_strftime_flag_specs[] =
+{
+ { '#', 0, 0, N_("'#' flag"), N_("the '#' strftime flag"), STD_EXT },
+ { 0, 0, 0, NULL, NULL, 0 }
+};
+
+static const format_flag_pair ms_strftime_flag_pairs[] =
+{
+ { 0, 0, 0, 0 }
+};
+
+static const format_char_info ms_print_char_table[] =
+{
+ /* C89 conversion specifiers. */
+ { "di", 0, STD_C89, { T89_I, BADLEN, T89_S, T89_L, T9L_LL, T99_SST, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "-wp0 +'", "i", NULL },
+ { "oxX", 0, STD_C89, { T89_UI, BADLEN, T89_US, T89_UL, T9L_ULL, T99_ST, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "-wp0#", "i", NULL },
+ { "u", 0, STD_C89, { T89_UI, BADLEN, T89_US, T89_UL, T9L_ULL, T99_ST, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "-wp0'", "i", NULL },
+ { "fgG", 0, STD_C89, { T89_D, BADLEN, BADLEN, T99_D, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "-wp0 +#'", "", NULL },
+ { "eE", 0, STD_C89, { T89_D, BADLEN, BADLEN, T99_D, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "-wp0 +#", "", NULL },
+ { "c", 0, STD_C89, { T89_I, BADLEN, T89_S, T94_WI, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "-w", "", NULL },
+ { "s", 1, STD_C89, { T89_C, BADLEN, T89_S, T94_W, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "-wp", "cR", NULL },
+ { "p", 1, STD_C89, { T89_V, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "-w", "c", NULL },
+ { "n", 1, STD_C89, { T89_I, BADLEN, T89_S, T89_L, T9L_LL, BADLEN, BADLEN, BADLEN, T99_IM, BADLEN, BADLEN, BADLEN }, "", "W", NULL },
+ /* X/Open conversion specifiers. */
+ { "C", 0, STD_EXT, { TEX_WI, BADLEN, T89_S, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "-w", "", NULL },
+ { "S", 1, STD_EXT, { TEX_W, BADLEN, T89_S, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "-wp", "R", NULL },
+ { NULL, 0, 0, NOLENGTHS, NULL, NULL, NULL }
+};
+
+static const format_char_info ms_scan_char_table[] =
+{
+ /* C89 conversion specifiers. */
+ { "di", 1, STD_C89, { T89_I, BADLEN, T89_S, T89_L, T9L_LL, T99_SST, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "*w'", "W", NULL },
+ { "u", 1, STD_C89, { T89_UI, BADLEN, T89_US, T89_UL, T9L_ULL, T99_ST, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "*w'", "W", NULL },
+ { "oxX", 1, STD_C89, { T89_UI, BADLEN, T89_US, T89_UL, T9L_ULL, T99_ST, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "*w", "W", NULL },
+ { "efgEG", 1, STD_C89, { T89_F, BADLEN, BADLEN, T89_D, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "*w'", "W", NULL },
+ { "c", 1, STD_C89, { T89_C, BADLEN, T89_S, T94_W, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "*w", "cW", NULL },
+ { "s", 1, STD_C89, { T89_C, BADLEN, T89_S, T94_W, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "*aw", "cW", NULL },
+ { "[", 1, STD_C89, { T89_C, BADLEN, BADLEN, T94_W, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "*aw", "cW[", NULL },
+ { "p", 2, STD_C89, { T89_V, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "*w", "W", NULL },
+ { "n", 1, STD_C89, { T89_I, BADLEN, T89_S, T89_L, T9L_LL, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "", "W", NULL },
+ /* X/Open conversion specifiers. */
+ { "C", 1, STD_EXT, { TEX_W, BADLEN, T89_S, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "*w", "W", NULL },
+ { "S", 1, STD_EXT, { TEX_W, BADLEN, T89_S, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN, BADLEN }, "*aw", "W", NULL },
+ { NULL, 0, 0, NOLENGTHS, NULL, NULL, NULL }
+};
+
+static const format_char_info ms_time_char_table[] =
+{
+ /* C89 conversion specifiers. */
+ { "ABZab", 0, STD_C89, NOLENGTHS, "#", "", NULL },
+ { "cx", 0, STD_C89, NOLENGTHS, "#", "3", NULL },
+ { "HIMSUWdmw", 0, STD_C89, NOLENGTHS, "#", "", NULL },
+ { "j", 0, STD_C89, NOLENGTHS, "#", "", NULL },
+ { "p", 0, STD_C89, NOLENGTHS, "#", "", NULL },
+ { "X", 0, STD_C89, NOLENGTHS, "#", "", NULL },
+ { "y", 0, STD_C89, NOLENGTHS, "#", "4", NULL },
+ { "Y", 0, STD_C89, NOLENGTHS, "#", "", NULL },
+ { "%", 0, STD_C89, NOLENGTHS, "", "", NULL },
+ /* C99 conversion specifiers. */
+ { "z", 0, STD_C99, NOLENGTHS, "#", "", NULL },
+ { NULL, 0, 0, NOLENGTHS, NULL, NULL, NULL }
+};
+
+const format_kind_info mingw_format_attributes[3] =
+{
+ { "ms_printf", ms_printf_length_specs, ms_print_char_table, " +#0-'", NULL,
+ ms_printf_flag_specs, ms_printf_flag_pairs,
+ FMT_FLAG_ARG_CONVERT|FMT_FLAG_DOLLAR_MULTIPLE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_EMPTY_PREC_OK,
+ 'w', 0, 'p', 0, 'L', 0,
+ &integer_type_node, &integer_type_node
+ },
+ { "ms_scanf", ms_printf_length_specs, ms_scan_char_table, "*'", NULL,
+ ms_scanf_flag_specs, ms_scanf_flag_pairs,
+ FMT_FLAG_ARG_CONVERT|FMT_FLAG_SCANF_A_KLUDGE|FMT_FLAG_USE_DOLLAR|FMT_FLAG_ZERO_WIDTH_BAD|FMT_FLAG_DOLLAR_GAP_POINTER_OK,
+ 'w', 0, 0, '*', 'L', 0,
+ NULL, NULL
+ },
+ { "ms_strftime", NULL, ms_time_char_table, "", "#",
+ ms_strftime_flag_specs, ms_strftime_flag_pairs,
+ FMT_FLAG_FANCY_PERCENT_OK, 0, 0, 0, 0, 0, 0,
+ NULL, NULL
+ }
+};
+
+/* Default overrides for printf, scanf and strftime. */
+const target_ovr_attr mingw_format_attribute_overrides[4] =
+{
+ { "ms_printf", "printf" },
+ { "ms_scanf", "scanf" },
+ { "ms_strftime", "strftime" }
+};
+
+/* Setup for option Wpedantic-ms-format. */
+
+#ifdef TARGET_OVERRIDES_FORMAT_INIT
+
+/* Make sure TARGET_OVERRIDES_FORMAT_INIT is prototyped. */
+extern void TARGET_OVERRIDES_FORMAT_INIT (void);
+
+/* Helper. */
+#define C89_OR_EXT (warn_pedantic_ms_format ? STD_EXT : STD_C89)
+
+void
+TARGET_OVERRIDES_FORMAT_INIT (void)
+{
+ ms_printf_length_specs[2].std = C89_OR_EXT; /* I32 */
+ ms_printf_length_specs[3].std = C89_OR_EXT; /* I64 */
+ ms_printf_length_specs[4].std = C89_OR_EXT; /* I */
+}
+
+#undef C89_OR_EXT
+
+#endif
diff --git a/gcc-4.4.0/gcc/config/i386/netbsd-elf.h b/gcc-4.4.0/gcc/config/i386/netbsd-elf.h
new file mode 100644
index 000000000..264d290a3
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/netbsd-elf.h
@@ -0,0 +1,124 @@
+/* Definitions of target machine for GCC,
+ for i386/ELF NetBSD systems.
+ Copyright (C) 2001, 2002, 2004, 2007 Free Software Foundation, Inc.
+ Contributed by matthew green <mrg@eterna.com.au>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ NETBSD_OS_CPP_BUILTINS_ELF(); \
+ } \
+ while (0)
+
+
+/* Extra specs needed for NetBSD/i386 ELF. */
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ { "netbsd_cpp_spec", NETBSD_CPP_SPEC }, \
+ { "netbsd_entry_point", NETBSD_ENTRY_POINT },
+
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/i386 ELF target. */
+
+#undef LINK_SPEC
+#define LINK_SPEC NETBSD_LINK_SPEC_ELF
+
+#define NETBSD_ENTRY_POINT "__start"
+
+
+/* Provide a CPP_SPEC appropriate for NetBSD. */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%(netbsd_cpp_spec)"
+
+
+/* Make gcc agree with <machine/ansi.h> */
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) svr4_dbx_register_map[n]
+
+
+/* Output assembler code to FILE to call the profiler. */
+
+#undef NO_PROFILE_COUNTERS
+#define NO_PROFILE_COUNTERS 1
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+{ \
+ if (flag_pic) \
+ fprintf (FILE, "\tcall __mcount@PLT\n"); \
+ else \
+ fprintf (FILE, "\tcall __mcount\n"); \
+}
+
+
+#undef HAS_INIT_SECTION
+
+/* This is how we tell the assembler that two symbols have the same value. */
+
+#define ASM_OUTPUT_DEF(FILE,NAME1,NAME2) \
+ do { assemble_name(FILE, NAME1); \
+ fputs(" = ", FILE); \
+ assemble_name(FILE, NAME2); \
+ fputc('\n', FILE); } while (0)
+
+/* A C statement to output to the stdio stream FILE an assembler
+ command to advance the location counter to a multiple of 1<<LOG
+ bytes if it is within MAX_SKIP bytes.
+
+ This is used to align code labels according to Intel recommendations. */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP) \
+ if ((LOG) != 0) { \
+ if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+ else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
+ }
+#endif
+
+/* We always use gas here, so we don't worry about ECOFF assembler
+ problems. */
+#undef TARGET_GAS
+#define TARGET_GAS 1
+
+/* Default to pcc-struct-return, because this is the ELF abi and
+ we don't care about compatibility with older gcc versions. */
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+/* Attempt to enable execute permissions on the stack. */
+#define ENABLE_EXECUTE_STACK NETBSD_ENABLE_EXECUTE_STACK
+
+#define TARGET_VERSION fprintf (stderr, " (NetBSD/i386 ELF)");
diff --git a/gcc-4.4.0/gcc/config/i386/netbsd.h b/gcc-4.4.0/gcc/config/i386/netbsd.h
new file mode 100644
index 000000000..dd7eae1a8
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/netbsd.h
@@ -0,0 +1,72 @@
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ NETBSD_OS_CPP_BUILTINS_AOUT(); \
+ } \
+ while (0)
+
+#define TARGET_VERSION fprintf (stderr, " (NetBSD/i386 a.out)");
+
+/* This goes away when the math-emulator is fixed */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+ (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_NO_FANCY_MATH_387)
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ { "netbsd_cpp_spec", NETBSD_CPP_SPEC },
+
+#undef CPP_SPEC
+#define CPP_SPEC "%(netbsd_cpp_spec)"
+
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+ we want to retain compatibility with older gcc versions. */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* i386 netbsd still uses old binutils that don't insert nops by default
+ when the .align directive demands to insert extra space in the text
+ segment. */
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ if ((LOG)!=0) fprintf ((FILE), "\t.align %d,0x90\n", (LOG))
+
+/* Profiling routines, partially copied from i386/osfrose.h. */
+
+/* Redefine this to use %eax instead of %edx. */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+{ \
+ if (flag_pic) \
+ { \
+ fprintf (FILE, "\tcall mcount@PLT\n"); \
+ } \
+ else \
+ { \
+ fprintf (FILE, "\tcall mcount\n"); \
+ } \
+}
+
+/* Until they use ELF or something that handles dwarf2 unwinds
+ and initialization stuff better. */
+#define DWARF2_UNWIND_INFO 0
+
+/* Redefine this so that it becomes "_GLOBAL_OFFSET_TABLE_" when the label
+ prefix is added. */
+#undef GOT_SYMBOL_NAME
+#define GOT_SYMBOL_NAME "GLOBAL_OFFSET_TABLE_"
+
+/* Attempt to enable execute permissions on the stack. */
+#define ENABLE_EXECUTE_STACK NETBSD_ENABLE_EXECUTE_STACK
diff --git a/gcc-4.4.0/gcc/config/i386/netbsd64.h b/gcc-4.4.0/gcc/config/i386/netbsd64.h
new file mode 100644
index 000000000..5add1032c
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/netbsd64.h
@@ -0,0 +1,72 @@
+/* Definitions of target machine for GCC,
+ for x86-64/ELF NetBSD systems.
+ Copyright (C) 2002, 2004, 2007 Free Software Foundation, Inc.
+ Contributed by Wasabi Systems, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ NETBSD_OS_CPP_BUILTINS_ELF(); \
+ } \
+ while (0)
+
+
+/* Extra specs needed for NetBSD/x86-64 ELF. */
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ { "netbsd_cpp_spec", NETBSD_CPP_SPEC }, \
+ { "netbsd_link_spec", NETBSD_LINK_SPEC_ELF }, \
+ { "netbsd_entry_point", NETBSD_ENTRY_POINT },
+
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/x86-64 ELF target. */
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%{m32:-m elf_i386} \
+ %{m64:-m elf_x86_64} \
+ %(netbsd_link_spec)"
+
+#define NETBSD_ENTRY_POINT "_start"
+
+
+/* Provide a CPP_SPEC appropriate for NetBSD. */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%(netbsd_cpp_spec)"
+
+
+/* Output assembler code to FILE to call the profiler. */
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+{ \
+ if (TARGET_64BIT && flag_pic) \
+ fprintf (FILE, "\tcall *__mcount@PLT\n"); \
+ else if (flag_pic) \
+ fprintf (FILE, "\tcall *__mcount@PLT\n"); \
+ else \
+ fprintf (FILE, "\tcall __mcount\n"); \
+}
+
+/* Attempt to enable execute permissions on the stack. */
+#define ENABLE_EXECUTE_STACK NETBSD_ENABLE_EXECUTE_STACK
+
+#define TARGET_VERSION fprintf (stderr, " (NetBSD/x86_64 ELF)");
diff --git a/gcc-4.4.0/gcc/config/i386/netware-crt0.c b/gcc-4.4.0/gcc/config/i386/netware-crt0.c
new file mode 100644
index 000000000..03141ab99
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/netware-crt0.c
@@ -0,0 +1,79 @@
+/* Startup routines for NetWare.
+ Contributed by Jan Beulich (jbeulich@novell.com)
+ Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include <stddef.h>
+#include <stdint.h>
+#include "unwind-dw2-fde.h"
+
+int __init_environment (void *);
+int __deinit_environment (void *);
+
+
+#define SECTION_DECL(name, decl) decl __attribute__((__section__(name)))
+
+SECTION_DECL(".ctors", void(*const __CTOR_LIST__)(void))
+ = (void(*)(void))(intptr_t)-1;
+SECTION_DECL(".ctors$_", void(*const __CTOR_END__)(void)) = NULL;
+
+SECTION_DECL(".dtors", void(*const __DTOR_LIST__)(void))
+ = (void(*)(void))(intptr_t)-1;
+SECTION_DECL(".dtors$_", void(*const __DTOR_END__)(void)) = NULL;
+
+/* No need to use the __[de]register_frame_info_bases functions since
+ for us the bases are NULL always anyway. */
+void __register_frame_info (const void *, struct object *)
+ __attribute__((__weak__));
+void *__deregister_frame_info (const void *) __attribute__((__weak__));
+
+SECTION_DECL(".eh_frame", /*const*/ uint32_t __EH_FRAME_BEGIN__[]) = { };
+SECTION_DECL(".eh_frame$_", /*const*/ uint32_t __EH_FRAME_END__[]) = {0};
+
+int
+__init_environment (void *unused __attribute__((__unused__)))
+{
+ void (* const * pctor)(void);
+ static struct object object;
+
+ if (__register_frame_info)
+ __register_frame_info (__EH_FRAME_BEGIN__, &object);
+
+ for (pctor = &__CTOR_END__ - 1; pctor > &__CTOR_LIST__; --pctor)
+ if (*pctor != NULL)
+ (*pctor)();
+
+ return 0;
+}
+
+int
+__deinit_environment (void *unused __attribute__((__unused__)))
+{
+ /* This should be static to prevent calling the same destructor
+ twice (just in case where we get here multiple times). */
+ static void (* const * pdtor)(void) = &__DTOR_LIST__ + 1;
+
+ while (pdtor < &__DTOR_END__)
+ if (*pdtor++ != NULL)
+ pdtor[-1] ();
+
+ if (__deregister_frame_info)
+ __deregister_frame_info(__EH_FRAME_BEGIN__);
+
+ return 0;
+}
diff --git a/gcc-4.4.0/gcc/config/i386/netware-libgcc.c b/gcc-4.4.0/gcc/config/i386/netware-libgcc.c
new file mode 100644
index 000000000..0925d872a
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/netware-libgcc.c
@@ -0,0 +1,58 @@
+/* Startup code for libgcc_s.nlm, necessary because we can't allow
+ libgcc_s to use libc's malloc & Co., which associate allocations
+ with the NLM owning the current (application) thread.
+ Contributed by Jan Beulich (jbeulich@novell.com)
+ Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include <netware.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <windows.h>
+
+static rtag_t allocRTag;
+
+BOOL
+DllMain (HINSTANCE libraryId __attribute__ ((__unused__)),
+ DWORD reason, void *hModule)
+{
+ switch (reason)
+ {
+ case DLL_NLM_STARTUP:
+ allocRTag = AllocateResourceTag (hModule,
+ "libgcc memory", AllocSignature);
+ return allocRTag != NULL;
+ case DLL_NLM_SHUTDOWN:
+ /* This does not recover resources associated with the tag...
+ ReturnResourceTag (allocRTag, 0); */
+ break;
+ }
+ return 1;
+}
+
+void *
+malloc (size_t size)
+{
+ return AllocSleepOK (size, allocRTag, NULL);
+}
+
+void
+free (void *ptr)
+{
+ Free (ptr);
+}
diff --git a/gcc-4.4.0/gcc/config/i386/netware-libgcc.def b/gcc-4.4.0/gcc/config/i386/netware-libgcc.def
new file mode 100644
index 000000000..a545631b1
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/netware-libgcc.def
@@ -0,0 +1,2 @@
+description "gcc runtime and intrinsics support"
+copyright "Copyright (C) 1989-2005 Free Software Foundation, Inc."
diff --git a/gcc-4.4.0/gcc/config/i386/netware-libgcc.exp b/gcc-4.4.0/gcc/config/i386/netware-libgcc.exp
new file mode 100644
index 000000000..309cf7549
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/netware-libgcc.exp
@@ -0,0 +1,83 @@
+# libgcc_s.nlm exports
+ (libgcc2),
+ __absvdi2,
+ __absvsi2,
+ __addvdi3,
+ __addvsi3,
+# __ashldi3,
+# __ashrdi3,
+ __bswapdi2,
+ __bswapsi2,
+ __clzdi2,
+ __clzsi2,
+ __ctzdi2,
+ __ctzsi2,
+ __deregister_frame,
+ __deregister_frame_info,
+ __deregister_frame_info_bases,
+ __divdc3,
+# __divdi3,
+ __divsc3,
+# __divtc3,
+ __divxc3,
+ __emutls_get_address,
+ __emutls_register_common,
+ __ffsdi2,
+ __ffssi2,
+ __fixunsdfdi,
+ __fixunssfdi,
+# __fixunstfdi,
+ __fixunsxfdi,
+ __floatundisf,
+ __floatundidf,
+# __floatunditf,
+ __floatundixf,
+ __gcc_bcmp,
+ __gcc_personality_v0,
+# __lshrdi3,
+# __moddi3,
+ __muldc3,
+# __muldi3,
+ __mulsc3,
+# __multc3,
+ __mulvdi3,
+ __mulvsi3,
+ __mulxc3,
+ __negvdi2,
+ __negvsi2,
+ __paritydi2,
+ __paritysi2,
+ __popcountdi2,
+ __popcountsi2,
+ __powidf2
+ __powisf2
+# __powitf2
+ __powixf2
+ __register_frame,
+ __register_frame_info,
+ __register_frame_info_bases,
+ __register_frame_info_table,
+ __register_frame_info_table_bases,
+ __register_frame_table,
+ __subvdi3,
+ __subvsi3,
+# __umoddi3,
+# __udivdi3,
+ _Unwind_Backtrace,
+ _Unwind_DeleteException,
+ _Unwind_FindEnclosingFunction,
+ _Unwind_Find_FDE,
+ _Unwind_ForcedUnwind,
+ _Unwind_GetCFA,
+ _Unwind_GetDataRelBase,
+ _Unwind_GetGR,
+ _Unwind_GetIP,
+ _Unwind_GetIPInfo,
+ _Unwind_GetLanguageSpecificData,
+ _Unwind_GetRegionStart,
+ _Unwind_GetTextRelBase,
+ _Unwind_RaiseException,
+ _Unwind_Resume,
+ _Unwind_Resume_or_Rethrow,
+ _Unwind_SetGR,
+ _Unwind_SetIP
diff --git a/gcc-4.4.0/gcc/config/i386/netware.c b/gcc-4.4.0/gcc/config/i386/netware.c
new file mode 100644
index 000000000..7f040d4b0
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/netware.c
@@ -0,0 +1,256 @@
+/* Subroutines for insn-output.c for NetWare.
+ Contributed by Jan Beulich (jbeulich@novell.com)
+ Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "tree.h"
+#include "flags.h"
+#include "tm_p.h"
+#include "toplev.h"
+#include "ggc.h"
+
+/* Return string which is the function name, identified by ID, modified
+ with PREFIX and a suffix consisting of an atsign (@) followed by the
+ number of bytes of arguments. If ID is NULL use the DECL_NAME as base.
+ Return NULL if no change required. */
+
+static tree
+gen_stdcall_or_fastcall_decoration (tree decl, tree id, char prefix)
+{
+ unsigned HOST_WIDE_INT total = 0;
+ const char *old_str = IDENTIFIER_POINTER (id != NULL_TREE ? id : DECL_NAME (decl));
+ char *new_str;
+ tree type = TREE_TYPE (decl);
+
+ if (prototype_p (type))
+ {
+ tree arg;
+ function_args_iterator args_iter;
+
+ /* This attribute is ignored for variadic functions. */
+ if (stdarg_p (type))
+ return NULL_TREE;
+
+ /* Quit if we hit an incomplete type. Error is reported
+ by convert_arguments in c-typeck.c or cp/typeck.c. */
+ FOREACH_FUNCTION_ARGS(type, arg, args_iter)
+ {
+ HOST_WIDE_INT parm_size;
+ unsigned HOST_WIDE_INT parm_boundary_bytes;
+
+ if (! COMPLETE_TYPE_P (arg))
+ break;
+
+ parm_size = int_size_in_bytes (arg);
+ if (parm_size < 0)
+ break;
+
+ parm_boundary_bytes = PARM_BOUNDARY / BITS_PER_UNIT;
+
+ /* Must round up to include padding. This is done the same
+ way as in store_one_arg. */
+ total += (parm_size + parm_boundary_bytes - 1)
+ / parm_boundary_bytes * parm_boundary_bytes;
+ }
+ }
+
+ new_str = XALLOCAVEC (char, 1 + strlen (old_str) + 1 + 10 + 1);
+ sprintf (new_str, "%c%s@" HOST_WIDE_INT_PRINT_UNSIGNED,
+ prefix, old_str, total);
+
+ return get_identifier (new_str);
+}
+
+/* Return string which is the function name, identified by ID, modified
+ with an _n@ prefix (where n represents the number of arguments passed in
+ registers). If ID is NULL use the DECL_NAME as base.
+ Return NULL if no change required. */
+
+static tree
+gen_regparm_prefix (tree decl, tree id, unsigned int nregs)
+{
+ unsigned HOST_WIDE_INT total = 0;
+ const char *old_str = IDENTIFIER_POINTER (id != NULL_TREE ? id : DECL_NAME (decl));
+ char *new_str;
+ tree type = TREE_TYPE (decl);
+
+ if (prototype_p (type))
+ {
+ tree arg;
+ function_args_iterator args_iter;
+
+ /* This attribute is ignored for variadic functions. */
+ if (stdarg_p (type))
+ return NULL_TREE;
+
+ /* Quit if we hit an incomplete type. Error is reported
+ by convert_arguments in c-typeck.c or cp/typeck.c. */
+ FOREACH_FUNCTION_ARGS(type, arg, args_iter)
+ {
+ HOST_WIDE_INT parm_size;
+ unsigned HOST_WIDE_INT parm_boundary_bytes;
+
+ if (! COMPLETE_TYPE_P (arg))
+ break;
+
+ parm_size = int_size_in_bytes (arg);
+ if (parm_size < 0)
+ break;
+
+ parm_boundary_bytes = PARM_BOUNDARY / BITS_PER_UNIT;
+
+ /* Must round up to include padding. This is done the same
+ way as in store_one_arg. */
+ total += (parm_size + parm_boundary_bytes - 1)
+ / parm_boundary_bytes * parm_boundary_bytes;
+ }
+ }
+
+ if (nregs > total / UNITS_PER_WORD)
+ nregs = total / UNITS_PER_WORD;
+ gcc_assert (nregs <= 9);
+ new_str = XALLOCAVEC (char, 3 + strlen (old_str) + 1);
+ sprintf (new_str, "_%u@%s", nregs, old_str);
+
+ return get_identifier (new_str);
+}
+
+/* Maybe decorate and get a new identifier for the DECL of a stdcall or
+ fastcall function. The original identifier is supplied in ID. */
+
+static tree
+i386_nlm_maybe_mangle_decl_assembler_name (tree decl, tree id)
+{
+ tree type_attributes = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+ tree new_id;
+
+ if (lookup_attribute ("stdcall", type_attributes))
+ new_id = gen_stdcall_or_fastcall_decoration (decl, id, '_');
+ else if (lookup_attribute ("fastcall", type_attributes))
+ new_id = gen_stdcall_or_fastcall_decoration (decl, id, FASTCALL_PREFIX);
+ else if ((new_id = lookup_attribute ("regparm", type_attributes)))
+ new_id = gen_regparm_prefix (decl, id,
+ TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (new_id))));
+ else
+ new_id = NULL_TREE;
+
+ return new_id;
+}
+
+/* This is used as a target hook to modify the DECL_ASSEMBLER_NAME
+ in the language-independent default hook
+ langhooks.c:lhd_set_decl_assembler_name ()
+ and in cp/mangle.c:mangle_decl (). */
+tree
+i386_nlm_mangle_decl_assembler_name (tree decl, tree id)
+{
+ tree new_id = TREE_CODE (decl) == FUNCTION_DECL
+ ? i386_nlm_maybe_mangle_decl_assembler_name (decl, id)
+ : NULL_TREE;
+
+ return (new_id ? new_id : id);
+}
+
+void
+i386_nlm_encode_section_info (tree decl, rtx rtl, int first)
+{
+ default_encode_section_info (decl, rtl, first);
+
+ if (first
+ && TREE_CODE (decl) == FUNCTION_DECL
+ /* Do not change the identifier if a verbatim asmspec
+ or if stdcall suffix already added. */
+ && *IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)) != '*'
+ && !strchr (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)), '@'))
+ {
+ /* FIXME: In Ada, and perhaps other language frontends,
+ imported stdcall names may not yet have been modified.
+ Check and do it know. */
+ rtx symbol = XEXP (rtl, 0);
+ tree new_id;
+ tree old_id = DECL_ASSEMBLER_NAME (decl);
+
+ gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+
+ if ((new_id = i386_nlm_maybe_mangle_decl_assembler_name (decl, old_id)))
+ {
+ /* These attributes must be present on first declaration,
+ change_decl_assembler_name will warn if they are added
+ later and the decl has been referenced, but duplicate_decls
+ should catch the mismatch first. */
+ change_decl_assembler_name (decl, new_id);
+ XSTR (symbol, 0) = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+ }
+ }
+}
+
+/* Strip the stdcall/fastcall/regparm pre-/suffix. */
+
+const char *
+i386_nlm_strip_name_encoding (const char *str)
+{
+ const char *name = default_strip_name_encoding (str);
+
+ if (*str != '*' && (*name == '_' || *name == '@'))
+ {
+ const char *p = strchr (name + 1, '@');
+
+ if (p)
+ {
+ ++name;
+ if (ISDIGIT (p[1]))
+ name = ggc_alloc_string (name, p - name);
+ else
+ {
+ gcc_assert (ISDIGIT (*name));
+ name++;
+ gcc_assert (name == p);
+ }
+ }
+ }
+ return name;
+}
+
+/* Sometimes certain combinations of command options do not make
+ sense on a particular target machine. You can define a macro
+ `OVERRIDE_OPTIONS' to take account of this. This macro, if
+ defined, is executed once just after all the command options have
+ been parsed.
+
+ Don't use this macro to turn on various extra optimizations for
+ `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
+
+void
+netware_override_options (void)
+{
+ override_options ();
+
+ if (flag_pic)
+ {
+ error ("-fPIC and -fpic are not supported for this target");
+ flag_pic = 0;
+ }
+}
diff --git a/gcc-4.4.0/gcc/config/i386/netware.h b/gcc-4.4.0/gcc/config/i386/netware.h
new file mode 100644
index 000000000..4f1ef5588
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/netware.h
@@ -0,0 +1,168 @@
+/* Core target definitions for GCC for Intel 80x86 running Netware.
+ and using dwarf for the debugging format.
+ Copyright (C) 1993, 1994, 2004, 2007, 2008 Free Software Foundation, Inc.
+
+ Written by David V. Henkel-Wallace (gumby@cygnus.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define TARGET_VERSION fprintf (stderr, " (x86 NetWare)");
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+#undef LIB_SPEC
+#define LIB_SPEC ""
+
+/* Kinda useless, but what the hell */
+#undef LINK_SPEC
+#define LINK_SPEC "%{h*} %{V} %{v:%{!V:-V}} \
+ %{b} \
+ %{Qy:} %{!Qn:-Qy}"
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC ""
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC ""
+
+#undef RELATIVE_PREFIX_NOT_LINKDIR
+#undef LIBGCC_SPEC
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_define_std ("IAPX386"); \
+ builtin_define ("_M_IX86=300"); \
+ builtin_define ("__netware__"); \
+ builtin_assert ("system=netware"); \
+ builtin_define ("__ELF__"); \
+ builtin_define ("__cdecl=__attribute__((__cdecl__))"); \
+ builtin_define ("__stdcall=__attribute__((__stdcall__))"); \
+ builtin_define ("__fastcall=__attribute__((__fastcall__))"); \
+ if (!flag_iso) \
+ { \
+ builtin_define ("_cdecl=__attribute__((__cdecl__))"); \
+ builtin_define ("_stdcall=__attribute__((__stdcall__))"); \
+ builtin_define ("_fastcall=__attribute__((__fastcall__))"); \
+ } \
+ } \
+ while (0)
+
+#undef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT TARGET_CPU_DEFAULT_pentium4
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+ returns float values in the 387, and uses MSVC bit field layout. */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP | \
+ MASK_FLOAT_RETURNS | MASK_ALIGN_DOUBLE | MASK_MS_BITFIELD_LAYOUT)
+
+/* Sometimes certain combinations of command options do not make
+ sense on a particular target machine. You can define a macro
+ `OVERRIDE_OPTIONS' to take account of this. This macro, if
+ defined, is executed once just after all the command options have
+ been parsed.
+
+ Don't use this macro to turn on various extra optimizations for
+ `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
+#undef OVERRIDE_OPTIONS
+extern void netware_override_options (void);
+#define OVERRIDE_OPTIONS netware_override_options ()
+
+#undef MATH_LIBRARY
+#define MATH_LIBRARY ""
+
+/* Align doubles and long-longs in structures on qword boundaries. */
+#undef BIGGEST_FIELD_ALIGNMENT
+#define BIGGEST_FIELD_ALIGNMENT 64
+
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Implicit arguments pointing to aggregate return values are to be
+ removed by the caller. */
+#undef KEEP_AGGREGATE_RETURN_POINTER
+#define KEEP_AGGREGATE_RETURN_POINTER 1
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) (svr4_dbx_register_map[n])
+
+/* Enable parsing of #pragma pack(push,<n>) and #pragma pack(pop). */
+#define HANDLE_PRAGMA_PACK_PUSH_POP
+
+/* Default structure packing is 1-byte. */
+#define TARGET_DEFAULT_PACK_STRUCT 1
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "short unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 16
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+/* A C statement (sans semicolon) to output to the stdio stream
+ FILE the assembler definition of uninitialized global DECL named
+ NAME whose size is SIZE bytes and alignment is ALIGN bytes.
+ Try to use asm_output_aligned_bss to implement this macro. */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+ asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Handle special EH pointer encodings. Absolute, pc-relative, and
+ indirect are handled automatically. */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+ do { \
+ if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_datarel) \
+ { \
+ fputs (ASM_LONG, FILE); \
+ assemble_name (FILE, XSTR (ADDR, 0)); \
+ fputs (((ENCODING) & DW_EH_PE_indirect ? "@GOT" : "@GOTOFF"), FILE); \
+ goto DONE; \
+ } \
+ } while (0)
+
+/* there is no TLS support in NLMs/on NetWare */
+#undef HAVE_AS_TLS
+
+#define HAS_INIT_SECTION
+#undef INIT_SECTION_ASM_OP
+
+#define CTOR_LISTS_DEFINED_EXTERNALLY
+
+#undef READONLY_DATA_SECTION_ASM_OP
+#define READONLY_DATA_SECTION_ASM_OP ".section\t.rodata"
+
+/* Define this macro if references to a symbol must be treated
+ differently depending on something about the variable or
+ function named by the symbol (such as what section it is in).
+
+ On i386 running NetWare, modify the assembler name with an underscore (_)
+ or atsign (@) prefix and a suffix consisting of an atsign (@) followed by
+ a string of digits that represents the number of bytes of arguments passed
+ to the function, if it has the attribute STDCALL. Alternatively, if it has
+ the REGPARM attribute, prefix it with an underscore (_), a digit
+ representing the number of registers used, and an atsign (@). */
+void i386_nlm_encode_section_info (tree, rtx, int);
+extern tree i386_nlm_mangle_decl_assembler_name (tree, tree);
+const char *i386_nlm_strip_name_encoding (const char *);
+#define SUBTARGET_ENCODE_SECTION_INFO i386_nlm_encode_section_info
+#define TARGET_MANGLE_DECL_ASSEMBLER_NAME i386_nlm_mangle_decl_assembler_name
+#undef TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING i386_nlm_strip_name_encoding
diff --git a/gcc-4.4.0/gcc/config/i386/nmmintrin.h b/gcc-4.4.0/gcc/config/i386/nmmintrin.h
new file mode 100644
index 000000000..2a2d264c6
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/nmmintrin.h
@@ -0,0 +1,37 @@
+/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>.
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 10.0. */
+
+#ifndef _NMMINTRIN_H_INCLUDED
+#define _NMMINTRIN_H_INCLUDED
+
+#ifndef __SSE4_2__
+# error "SSE4.2 instruction set not enabled"
+#else
+/* We just include SSE4.1 header file. */
+#include <smmintrin.h>
+#endif /* __SSE4_2__ */
+
+#endif /* _NMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/nto.h b/gcc-4.4.0/gcc/config/i386/nto.h
new file mode 100644
index 000000000..8963d465b
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/nto.h
@@ -0,0 +1,93 @@
+/* Definitions for Intel 386 running QNX/Neutrino.
+ Copyright (C) 2002, 2003, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (QNX/Neutrino/i386 ELF)");
+
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_define_std ("__X86__"); \
+ builtin_define_std ("__QNXNTO__"); \
+ builtin_define_std ("__QNX__"); \
+ builtin_define_std ("__ELF__"); \
+ builtin_define_std ("__LITTLEENDIAN__");\
+ builtin_assert ("system=qnx"); \
+ builtin_assert ("system=qnxnto"); \
+ builtin_assert ("system=nto"); \
+ builtin_assert ("system=unix"); \
+ } \
+ while (0)
+
+#undef THREAD_MODEL_SPEC
+#define THREAD_MODEL_SPEC "posix"
+
+#ifdef CROSS_DIRECTORY_STRUCTURE
+#define SYSROOT_SUFFIX_SPEC "x86"
+#endif
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+"%{!shared: \
+ %{!symbolic: \
+ %{pg:mcrt1.o%s} \
+ %{!pg:%{p:mcrt1.o%s} \
+ %{!p:crt1.o%s}}}} \
+crti.o%s \
+%{fexceptions: crtbegin.o%s} \
+%{!fexceptions: %R/lib/crtbegin.o}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+ "crtend.o%s crtn.o%s"
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%{h*} %{v:-V} \
+ %{b} \
+ %{static:-dn -Bstatic} \
+ %{shared:-G -dy -z text} \
+ %{symbolic:-Bsymbolic -G -dy -z text} \
+ %{G:-G} \
+ %{YP,*} \
+ %{!YP,*:%{p:-Y P,%R/lib} \
+ %{!p:-Y P,%R/lib}} \
+ %{Qy:} %{!Qn:-Qy} \
+ -m i386nto \
+ %{!shared: --dynamic-linker /usr/lib/ldqnx.so.2}"
+
+
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "long unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+#define NO_IMPLICIT_EXTERN_C 1
+
diff --git a/gcc-4.4.0/gcc/config/i386/nwld.c b/gcc-4.4.0/gcc/config/i386/nwld.c
new file mode 100644
index 000000000..9ae2aad3d
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/nwld.c
@@ -0,0 +1,73 @@
+/* Subroutines for insn-output.c for NetWare.
+ Contributed by Jan Beulich (jbeulich@novell.com)
+ Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "tree.h"
+#include "flags.h"
+#include "tm_p.h"
+#include "toplev.h"
+
+void
+nwld_named_section_asm_out_constructor (rtx symbol, int priority)
+{
+#if !SUPPORTS_INIT_PRIORITY
+ const char section[] = ".ctors"TARGET_SUB_SECTION_SEPARATOR;
+#else
+ char section[20];
+
+ sprintf (section,
+ ".ctors"TARGET_SUB_SECTION_SEPARATOR"%.5u",
+ /* Invert the numbering so the linker puts us in the proper
+ order; constructors are run from right to left, and the
+ linker sorts in increasing order. */
+ MAX_INIT_PRIORITY - priority);
+#endif
+
+ switch_to_section (get_section (section, 0, NULL));
+ assemble_align (POINTER_SIZE);
+ assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
+
+void
+nwld_named_section_asm_out_destructor (rtx symbol, int priority)
+{
+#if !SUPPORTS_INIT_PRIORITY
+ const char section[] = ".dtors"TARGET_SUB_SECTION_SEPARATOR;
+#else
+ char section[20];
+
+ sprintf (section, ".dtors"TARGET_SUB_SECTION_SEPARATOR"%.5u",
+ /* Invert the numbering so the linker puts us in the proper
+ order; destructors are run from left to right, and the
+ linker sorts in increasing order. */
+ MAX_INIT_PRIORITY - priority);
+#endif
+
+ switch_to_section (get_section (section, 0, NULL));
+ assemble_align (POINTER_SIZE);
+ assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
+}
diff --git a/gcc-4.4.0/gcc/config/i386/nwld.h b/gcc-4.4.0/gcc/config/i386/nwld.h
new file mode 100644
index 000000000..be41e96e1
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/nwld.h
@@ -0,0 +1,62 @@
+/* nwld.h -- defines to be used when targeting GCC for some generic NetWare
+ system while using the Novell linker.
+ Copyright (C) 2004, 2007 Free Software Foundation, Inc.
+
+ Written by Jan Beulich (jbeulich@novell.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#undef LIB_SPEC
+#define LIB_SPEC "-lc --def-file libc.def%s"
+
+#undef LIBGCC_SPEC
+#define LIBGCC_SPEC "-lgcc %{!static-libgcc:--def-file libgcc.def%s}"
+
+#undef LINKER_NAME
+#define LINKER_NAME "nwld"
+
+#undef LINK_SPEC
+#define LINK_SPEC "--format:NLM --extensions:GNU" \
+ " %{static:%{!nostdlib:%{!nodefaultlib:%eStatic linking is not supported.\n}}}"
+
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC "%L %G"
+
+/* In order to permit the linker to derive the output filename from the first
+ input file, put the common startup code as the last object. */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC ""
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "crt0%O%s ../imports/%{!posix:libc}%{posix:posix}pre.gcc%O%s" \
+ " --def-file %{!posix:libc}%{posix:posix}pre.def%s"
+
+#define DRIVER_SELF_SPECS "%{!static-libgcc:-shared-libgcc}"
+
+#define TARGET_SUB_SECTION_SEPARATOR "$"
+
+void nwld_named_section_asm_out_constructor (rtx, int);
+void nwld_named_section_asm_out_destructor (rtx, int);
+
+#define TARGET_ASM_CONSTRUCTOR nwld_named_section_asm_out_constructor
+#define TARGET_ASM_DESTRUCTOR nwld_named_section_asm_out_destructor
+
+#undef EH_FRAME_SECTION_NAME
+#define EH_FRAME_SECTION_NAME ".eh_frame"TARGET_SUB_SECTION_SEPARATOR
+
+/* nwld does not currently support stabs debug info */
+#undef DBX_DEBUGGING_INFO
diff --git a/gcc-4.4.0/gcc/config/i386/openbsd.h b/gcc-4.4.0/gcc/config/i386/openbsd.h
new file mode 100644
index 000000000..d64f15907
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/openbsd.h
@@ -0,0 +1,101 @@
+/* Configuration for an OpenBSD i386 target.
+ Copyright (C) 1999, 2000, 2002, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+
+#define TARGET_VERSION fprintf (stderr, " (OpenBSD/i386)");
+
+/* This goes away when the math-emulator is fixed */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+ (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_NO_FANCY_MATH_387)
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_define ("__unix__"); \
+ builtin_define ("__OpenBSD__"); \
+ builtin_assert ("system=unix"); \
+ builtin_assert ("system=bsd"); \
+ builtin_assert ("system=OpenBSD"); \
+ } \
+ while (0)
+
+/* Layout of source language data types. */
+
+/* This must agree with <machine/ansi.h> */
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+/* Assembler format: overall framework. */
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+/* Stack & calling: aggregate returns. */
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+ we want to retain compatibility with older gcc versions. */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Assembler format: alignment output. */
+
+/* Kludgy test: when gas is upgraded, it will have p2align, and no problems
+ with nops. */
+#ifndef HAVE_GAS_MAX_SKIP_P2ALIGN
+/* i386 OpenBSD still uses an older gas that doesn't insert nops by default
+ when the .align directive demands to insert extra space in the text
+ segment. */
+#undef ASM_OUTPUT_ALIGN
+#define ASM_OUTPUT_ALIGN(FILE,LOG) \
+ if ((LOG)!=0) fprintf ((FILE), "\t.align %d,0x90\n", (LOG))
+#endif
+
+/* Stack & calling: profiling. */
+
+/* OpenBSD's profiler recovers all information from the stack pointer.
+ The icky part is not here, but in machine/profile.h. */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+ fputs (flag_pic ? "\tcall mcount@PLT\n": "\tcall mcount\n", FILE);
+
+/* Assembler format: exception region output. */
+
+/* All configurations that don't use elf must be explicit about not using
+ dwarf unwind information. */
+#define DWARF2_UNWIND_INFO 0
+
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START ";#"
+
+/* OpenBSD gas currently does not support quad, so do not use it. */
+#undef ASM_QUAD
diff --git a/gcc-4.4.0/gcc/config/i386/openbsdelf.h b/gcc-4.4.0/gcc/config/i386/openbsdelf.h
new file mode 100644
index 000000000..c76d26e26
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/openbsdelf.h
@@ -0,0 +1,131 @@
+/* Configuration for an OpenBSD i386 target.
+
+ Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* This gets defined in tm.h->linux.h->svr4.h, and keeps us from using
+ libraries compiled with the native cc, so undef it. */
+#undef NO_DOLLAR_IN_LABEL
+
+/* Override the default comment-starter of "/". */
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) svr4_dbx_register_map[n]
+
+/* This goes away when the math-emulator is fixed */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+ (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_NO_FANCY_MATH_387)
+
+/* Run-time target specifications */
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ OPENBSD_OS_CPP_BUILTINS(); \
+ } \
+ while (0)
+
+/* As an elf system, we need crtbegin/crtend stuff. */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC "\
+ %{!shared: %{pg:gcrt0%O%s} %{!pg:%{p:gcrt0%O%s} %{!p:crt0%O%s}} \
+ crtbegin%O%s} %{shared:crtbeginS%O%s}"
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}"
+
+/* Layout of source language data types. */
+
+/* This must agree with <machine/ansi.h> */
+#undef SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE BITS_PER_WORD
+
+/* Assembler format: overall framework. */
+
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+
+#undef SET_ASM_OP
+#define SET_ASM_OP "\t.set\t"
+
+/* The following macros were originally stolen from i386v4.h.
+ These have to be defined to get PIC code correct. */
+
+/* Assembler format: dispatch tables. */
+
+/* Assembler format: sections. */
+
+/* Stack & calling: aggregate returns. */
+
+/* Don't default to pcc-struct-return, because gcc is the only compiler, and
+ we want to retain compatibility with older gcc versions. */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+/* Assembler format: alignment output. */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
+ if ((LOG) != 0) {\
+ if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+ else fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
+ }
+#endif
+
+/* Stack & calling: profiling. */
+
+/* OpenBSD's profiler recovers all information from the stack pointer.
+ The icky part is not here, but in machine/profile.h. */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE, LABELNO) \
+ fputs (flag_pic ? "\tcall __mcount@PLT\n": "\tcall __mcount\n", FILE);
+
+/* Assembler format: exception region output. */
+
+/* our configuration still doesn't handle dwarf2 correctly */
+#define DWARF2_UNWIND_INFO 0
+
+/* Assembler format: alignment output. */
+
+/* Note that we pick up ASM_OUTPUT_MAX_SKIP_ALIGN from i386/gas.h */
+
+/* Note that we pick up ASM_OUTPUT_MI_THUNK from unix.h. */
+
+#undef LINK_SPEC
+#define LINK_SPEC \
+ "%{!shared:%{!nostdlib:%{!r*:%{!e*:-e __start}}}} \
+ %{shared:-shared} %{R*} \
+ %{static:-Bstatic} \
+ %{!static:-Bdynamic} \
+ %{assert*} \
+ %{!dynamic-linker:-dynamic-linker /usr/libexec/ld.so}"
+
+#define OBSD_HAS_CORRECT_SPECS
diff --git a/gcc-4.4.0/gcc/config/i386/pentium.md b/gcc-4.4.0/gcc/config/i386/pentium.md
new file mode 100644
index 000000000..c6c5bd55f
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/pentium.md
@@ -0,0 +1,306 @@
+;; Pentium Scheduling
+;; Copyright (C) 2002, 2007 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>. */
+;;
+;; The Pentium is an in-order core with two integer pipelines.
+
+;; True for insns that behave like prefixed insns on the Pentium.
+(define_attr "pent_prefix" "false,true"
+ (if_then_else (ior (eq_attr "prefix_0f" "1")
+ (ior (eq_attr "prefix_data16" "1")
+ (eq_attr "prefix_rep" "1")))
+ (const_string "true")
+ (const_string "false")))
+
+;; Categorize how an instruction slots.
+
+;; The non-MMX Pentium slots an instruction with prefixes on U pipe only,
+;; while MMX Pentium can slot it on either U or V. Model non-MMX Pentium
+;; rules, because it results in noticeably better code on non-MMX Pentium
+;; and doesn't hurt much on MMX. (Prefixed instructions are not very
+;; common, so the scheduler usually has a non-prefixed insn to pair).
+
+(define_attr "pent_pair" "uv,pu,pv,np"
+ (cond [(eq_attr "imm_disp" "true")
+ (const_string "np")
+ (ior (eq_attr "type" "alu1,alu,imov,icmp,test,lea,incdec")
+ (and (eq_attr "type" "pop,push")
+ (eq_attr "memory" "!both")))
+ (if_then_else (eq_attr "pent_prefix" "true")
+ (const_string "pu")
+ (const_string "uv"))
+ (eq_attr "type" "ibr")
+ (const_string "pv")
+ (and (eq_attr "type" "ishift")
+ (match_operand 2 "const_int_operand" ""))
+ (const_string "pu")
+ (and (eq_attr "type" "rotate")
+ (match_operand 2 "const1_operand" ""))
+ (const_string "pu")
+ (and (eq_attr "type" "ishift1")
+ (match_operand 1 "const_int_operand" ""))
+ (const_string "pu")
+ (and (eq_attr "type" "rotate1")
+ (match_operand 1 "const1_operand" ""))
+ (const_string "pu")
+ (and (eq_attr "type" "call")
+ (match_operand 0 "constant_call_address_operand" ""))
+ (const_string "pv")
+ (and (eq_attr "type" "callv")
+ (match_operand 1 "constant_call_address_operand" ""))
+ (const_string "pv")
+ ]
+ (const_string "np")))
+
+(define_automaton "pentium,pentium_fpu")
+
+;; Pentium do have U and V pipes. Instruction to both pipes
+;; are always issued together, much like on VLIW.
+;;
+;; predecode
+;; / \
+;; decodeu decodev
+;; / | |
+;; fpu executeu executev
+;; | | |
+;; fpu retire retire
+;; |
+;; fpu
+;; We add dummy "port" pipes allocated only first cycle of
+;; instruction to specify this behavior.
+
+(define_cpu_unit "pentium-portu,pentium-portv" "pentium")
+(define_cpu_unit "pentium-u,pentium-v" "pentium")
+(absence_set "pentium-portu" "pentium-u,pentium-v")
+(presence_set "pentium-portv" "pentium-portu")
+
+;; Floating point instructions can overlap with new issue of integer
+;; instructions. We model only first cycle of FP pipeline, as it is
+;; fully pipelined.
+(define_cpu_unit "pentium-fp" "pentium_fpu")
+
+;; There is non-pipelined multiplier unit used for complex operations.
+(define_cpu_unit "pentium-fmul" "pentium_fpu")
+
+;; Pentium preserves memory ordering, so when load-execute-store
+;; instruction is executed together with other instruction loading
+;; data, the execution of the other instruction is delayed to very
+;; last cycle of first instruction, when data are bypassed.
+;; We model this by allocating "memory" unit when store is pending
+;; and using conflicting load units together.
+
+(define_cpu_unit "pentium-memory" "pentium")
+(define_cpu_unit "pentium-load0" "pentium")
+(define_cpu_unit "pentium-load1" "pentium")
+(absence_set "pentium-load0,pentium-load1" "pentium-memory")
+
+(define_reservation "pentium-load" "(pentium-load0 | pentium-load1)")
+(define_reservation "pentium-np" "(pentium-u + pentium-v)")
+(define_reservation "pentium-uv" "(pentium-u | pentium-v)")
+(define_reservation "pentium-portuv" "(pentium-portu | pentium-portv)")
+(define_reservation "pentium-firstu" "(pentium-u + pentium-portu)")
+(define_reservation "pentium-firstv" "(pentium-v + pentium-portuv)")
+(define_reservation "pentium-firstuv" "(pentium-uv + pentium-portuv)")
+(define_reservation "pentium-firstuload" "(pentium-load + pentium-firstu)")
+(define_reservation "pentium-firstvload" "(pentium-load + pentium-firstv)")
+(define_reservation "pentium-firstuvload" "(pentium-load + pentium-firstuv)
+ | (pentium-firstv,pentium-v,
+ (pentium-load+pentium-firstv))")
+(define_reservation "pentium-firstuboth" "(pentium-load + pentium-firstu
+ + pentium-memory)")
+(define_reservation "pentium-firstvboth" "(pentium-load + pentium-firstv
+ + pentium-memory)")
+(define_reservation "pentium-firstuvboth" "(pentium-load + pentium-firstuv
+ + pentium-memory)
+ | (pentium-firstv,pentium-v,
+ (pentium-load+pentium-firstv))")
+
+;; Few common long latency instructions
+(define_insn_reservation "pent_mul" 11
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "imul"))
+ "pentium-np*11")
+
+(define_insn_reservation "pent_str" 12
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "str"))
+ "pentium-np*12")
+
+;; Integer division and some other long latency instruction block all
+;; units, including the FP pipe. There is no value in modeling the
+;; latency of these instructions and not modeling the latency
+;; decreases the size of the DFA.
+(define_insn_reservation "pent_block" 1
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "idiv"))
+ "pentium-np+pentium-fp")
+
+;; Moves usually have one cycle penalty, but there are exceptions.
+(define_insn_reservation "pent_fmov" 1
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "none,load")))
+ "(pentium-fp+pentium-np)")
+
+(define_insn_reservation "pent_fpmovxf" 3
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "type" "fmov")
+ (and (eq_attr "memory" "load,store")
+ (eq_attr "mode" "XF"))))
+ "(pentium-fp+pentium-np)*3")
+
+(define_insn_reservation "pent_fpstore" 2
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "type" "fmov")
+ (ior (match_operand 1 "immediate_operand" "")
+ (eq_attr "memory" "store"))))
+ "(pentium-fp+pentium-np)*2")
+
+(define_insn_reservation "pent_imov" 1
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "imov"))
+ "pentium-firstuv")
+
+;; Push and pop instructions have 1 cycle latency and special
+;; hardware bypass allows them to be paired with other push,pop
+;; and call instructions.
+(define_bypass 0 "pent_push,pent_pop" "pent_push,pent_pop,pent_call")
+(define_insn_reservation "pent_push" 1
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "type" "push")
+ (eq_attr "memory" "store")))
+ "pentium-firstuv")
+
+(define_insn_reservation "pent_pop" 1
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "pop,leave"))
+ "pentium-firstuv")
+
+;; Call and branch instruction can execute in either pipe, but
+;; they are only pairable when in the v pipe.
+(define_insn_reservation "pent_call" 10
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "call,callv"))
+ "pentium-firstv,pentium-v*9")
+
+(define_insn_reservation "pent_branch" 1
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "ibr"))
+ "pentium-firstv")
+
+;; Floating point instruction dispatch in U pipe, but continue
+;; in FP pipeline allowing other instructions to be executed.
+(define_insn_reservation "pent_fp" 3
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "fop,fistp"))
+ "(pentium-firstu+pentium-fp),nothing,nothing")
+
+;; First two cycles of fmul are not pipelined.
+(define_insn_reservation "pent_fmul" 3
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "fmul"))
+ "(pentium-firstuv+pentium-fp+pentium-fmul),pentium-fmul,nothing")
+
+;; Long latency FP instructions overlap with integer instructions,
+;; but only last 2 cycles with FP ones.
+(define_insn_reservation "pent_fdiv" 39
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "fdiv"))
+ "(pentium-np+pentium-fp+pentium-fmul),
+ (pentium-fp+pentium-fmul)*36,pentium-fmul*2")
+
+(define_insn_reservation "pent_fpspc" 70
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "fpspc"))
+ "(pentium-np+pentium-fp+pentium-fmul),
+ (pentium-fp+pentium-fmul)*67,pentium-fmul*2")
+
+;; Integer instructions. Load/execute/store takes 3 cycles,
+;; load/execute 2 cycles and execute only one cycle.
+(define_insn_reservation "pent_uv_both" 3
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "uv")
+ (eq_attr "memory" "both")))
+ "pentium-firstuvboth,pentium-uv+pentium-memory,pentium-uv")
+
+(define_insn_reservation "pent_u_both" 3
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "pu")
+ (eq_attr "memory" "both")))
+ "pentium-firstuboth,pentium-u+pentium-memory,pentium-u")
+
+(define_insn_reservation "pent_v_both" 3
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "pv")
+ (eq_attr "memory" "both")))
+ "pentium-firstvboth,pentium-v+pentium-memory,pentium-v")
+
+(define_insn_reservation "pent_np_both" 3
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "np")
+ (eq_attr "memory" "both")))
+ "pentium-np,pentium-np,pentium-np")
+
+(define_insn_reservation "pent_uv_load" 2
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "uv")
+ (eq_attr "memory" "load")))
+ "pentium-firstuvload,pentium-uv")
+
+(define_insn_reservation "pent_u_load" 2
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "pu")
+ (eq_attr "memory" "load")))
+ "pentium-firstuload,pentium-u")
+
+(define_insn_reservation "pent_v_load" 2
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "pv")
+ (eq_attr "memory" "load")))
+ "pentium-firstvload,pentium-v")
+
+(define_insn_reservation "pent_np_load" 2
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "np")
+ (eq_attr "memory" "load")))
+ "pentium-np,pentium-np")
+
+(define_insn_reservation "pent_uv" 1
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "uv")
+ (eq_attr "memory" "none")))
+ "pentium-firstuv")
+
+(define_insn_reservation "pent_u" 1
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "pu")
+ (eq_attr "memory" "none")))
+ "pentium-firstu")
+
+(define_insn_reservation "pent_v" 1
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "pv")
+ (eq_attr "memory" "none")))
+ "pentium-firstv")
+
+(define_insn_reservation "pent_np" 1
+ (and (eq_attr "cpu" "pentium")
+ (and (eq_attr "pent_pair" "np")
+ (eq_attr "memory" "none")))
+ "pentium-np")
+
diff --git a/gcc-4.4.0/gcc/config/i386/pmm_malloc.h b/gcc-4.4.0/gcc/config/i386/pmm_malloc.h
new file mode 100644
index 000000000..0a9f2e227
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/pmm_malloc.h
@@ -0,0 +1,57 @@
+/* Copyright (C) 2004, 2006, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _MM_MALLOC_H_INCLUDED
+#define _MM_MALLOC_H_INCLUDED
+
+#include <stdlib.h>
+
+/* We can't depend on <stdlib.h> since the prototype of posix_memalign
+ may not be visible. */
+#ifndef __cplusplus
+extern int posix_memalign (void **, size_t, size_t);
+#else
+extern "C" int posix_memalign (void **, size_t, size_t) throw ();
+#endif
+
+static __inline void *
+_mm_malloc (size_t size, size_t alignment)
+{
+ void *ptr;
+ if (alignment == 1)
+ return malloc (size);
+ if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4))
+ alignment = sizeof (void *);
+ if (posix_memalign (&ptr, alignment, size) == 0)
+ return ptr;
+ else
+ return NULL;
+}
+
+static __inline void
+_mm_free (void * ptr)
+{
+ free (ptr);
+}
+
+#endif /* _MM_MALLOC_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/pmmintrin.h b/gcc-4.4.0/gcc/config/i386/pmmintrin.h
new file mode 100644
index 000000000..c5c9ae27c
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/pmmintrin.h
@@ -0,0 +1,128 @@
+/* Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0. */
+
+#ifndef _PMMINTRIN_H_INCLUDED
+#define _PMMINTRIN_H_INCLUDED
+
+#ifndef __SSE3__
+# error "SSE3 instruction set not enabled"
+#else
+
+/* We need definitions from the SSE2 and SSE header files*/
+#include <emmintrin.h>
+
+/* Additional bits in the MXCSR. */
+#define _MM_DENORMALS_ZERO_MASK 0x0040
+#define _MM_DENORMALS_ZERO_ON 0x0040
+#define _MM_DENORMALS_ZERO_OFF 0x0000
+
+#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
+ _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
+#define _MM_GET_DENORMALS_ZERO_MODE() \
+ (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_addsub_ps (__m128 __X, __m128 __Y)
+{
+ return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_ps (__m128 __X, __m128 __Y)
+{
+ return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_ps (__m128 __X, __m128 __Y)
+{
+ return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movehdup_ps (__m128 __X)
+{
+ return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_moveldup_ps (__m128 __X)
+{
+ return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_addsub_pd (__m128d __X, __m128d __Y)
+{
+ return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pd (__m128d __X, __m128d __Y)
+{
+ return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pd (__m128d __X, __m128d __Y)
+{
+ return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loaddup_pd (double const *__P)
+{
+ return _mm_load1_pd (__P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movedup_pd (__m128d __X)
+{
+ return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_lddqu_si128 (__m128i const *__P)
+{
+ return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
+{
+ __builtin_ia32_monitor (__P, __E, __H);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mwait (unsigned int __E, unsigned int __H)
+{
+ __builtin_ia32_mwait (__E, __H);
+}
+
+#endif /* __SSE3__ */
+
+#endif /* _PMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/ppro.md b/gcc-4.4.0/gcc/config/i386/ppro.md
new file mode 100644
index 000000000..5e163d829
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/ppro.md
@@ -0,0 +1,758 @@
+;; Scheduling for the Intel P6 family of processors
+;; Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>. */
+
+;; The P6 family includes the Pentium Pro, Pentium II, Pentium III, Celeron
+;; and Xeon lines of CPUs. The DFA scheduler description in this file is
+;; based on information that can be found in the following three documents:
+;;
+;; "P6 Family of Processors Hardware Developer's Manual",
+;; Intel, September 1999.
+;;
+;; "Intel Architecture Optimization Manual",
+;; Intel, 1999 (Order Number: 245127-001).
+;;
+;; "How to optimize for the Pentium family of microprocessors",
+;; by Agner Fog, PhD.
+;;
+;; The P6 pipeline has three major components:
+;; 1) the FETCH/DECODE unit, an in-order issue front-end
+;; 2) the DISPATCH/EXECUTE unit, which is the out-of-order core
+;; 3) the RETIRE unit, an in-order retirement unit
+;;
+;; So, the P6 CPUs have out-of-order cores, but the instruction decoder and
+;; retirement unit are naturally in-order.
+;;
+;; BUS INTERFACE UNIT
+;; / \
+;; L1 ICACHE L1 DCACHE
+;; / | \ | \
+;; DECODER0 DECODER1 DECODER2 DISP/EXEC RETIRE
+;; \ | / | |
+;; INSTRUCTION POOL __________|_______/
+;; (inc. reorder buffer)
+;;
+;; Since the P6 CPUs execute instructions out-of-order, the most important
+;; consideration in performance tuning is making sure enough micro-ops are
+;; ready for execution in the out-of-order core, while not stalling the
+;; decoder.
+;;
+;; TODO:
+;; - Find a less crude way to model complex instructions, in
+;; particular how many cycles they take to be decoded.
+;; - Include decoder latencies in the total reservation latencies.
+;; This isn't necessary right now because we assume for every
+;; instruction that it never blocks a decoder.
+;; - Figure out where the p0 and p1 reservations come from. These
+;; appear not to be in the manual
+;; - Lots more because I'm sure this is still far from optimal :-)
+
+;; The ppro_idiv and ppro_fdiv automata are used to model issue
+;; latencies of idiv and fdiv type insns.
+(define_automaton "ppro_decoder,ppro_core,ppro_idiv,ppro_fdiv,ppro_load,ppro_store")
+
+;; Simple instructions of the register-register form have only one uop.
+;; Load instructions are also only one uop. Store instructions decode to
+;; two uops, and simple read-modify instructions also take two uops.
+;; Simple instructions of the register-memory form have two to three uops.
+;; Simple read-modify-write instructions have four uops. The rules for
+;; the decoder are simple:
+;; - an instruction with 1 uop can be decoded by any of the three
+;; decoders in one cycle.
+;; - an instruction with 1 to 4 uops can be decoded only by decoder 0
+;; but still in only one cycle.
+;; - a complex (microcode) instruction can also only be decoded by
+;; decoder 0, and this takes an unspecified number of cycles.
+;;
+;; The goal is to schedule such that we have a few-one-one uops sequence
+;; in each cycle, to decode as many instructions per cycle as possible.
+(define_cpu_unit "decoder0" "ppro_decoder")
+(define_cpu_unit "decoder1" "ppro_decoder")
+(define_cpu_unit "decoder2" "ppro_decoder")
+
+;; We first wish to find an instruction for decoder0, so exclude
+;; decoder1 and decoder2 from being reserved until decoder 0 is
+;; reserved.
+(presence_set "decoder1" "decoder0")
+(presence_set "decoder2" "decoder0")
+
+;; Most instructions can be decoded on any of the three decoders.
+(define_reservation "decodern" "(decoder0|decoder1|decoder2)")
+
+;; The out-of-order core has five pipelines. During each cycle, the core
+;; may dispatch zero or one uop on the port of any of the five pipelines
+;; so the maximum number of dispatched uops per cycle is 5. In practicer,
+;; 3 uops per cycle is more realistic.
+;;
+;; Two of the five pipelines contain several execution units:
+;;
+;; Port 0 Port 1 Port 2 Port 3 Port 4
+;; ALU ALU LOAD SAC SDA
+;; FPU JUE
+;; AGU MMX
+;; MMX P3FPU
+;; P3FPU
+;;
+;; (SAC=Store Address Calculation, SDA=Store Data Unit, P3FPU = SSE unit,
+;; JUE = Jump Execution Unit, AGU = Address Generation Unit)
+;;
+(define_cpu_unit "p0,p1" "ppro_core")
+(define_cpu_unit "p2" "ppro_load")
+(define_cpu_unit "p3,p4" "ppro_store")
+(define_cpu_unit "idiv" "ppro_idiv")
+(define_cpu_unit "fdiv" "ppro_fdiv")
+
+;; Only the irregular instructions have to be modeled here. A load
+;; increases the latency by 2 or 3, or by nothing if the manual gives
+;; a latency already. Store latencies are not accounted for.
+;;
+;; The simple instructions follow a very regular pattern of 1 uop per
+;; reg-reg operation, 1 uop per load on port 2. and 2 uops per store
+;; on port 4 and port 3. These instructions are modelled at the bottom
+;; of this file.
+;;
+;; For microcoded instructions we don't know how many uops are produced.
+;; These instructions are the "complex" ones in the Intel manuals. All
+;; we _do_ know is that they typically produce four or more uops, so
+;; they can only be decoded on decoder0. Modelling their latencies
+;; doesn't make sense because we don't know how these instructions are
+;; executed in the core. So we just model that they can only be decoded
+;; on decoder 0, and say that it takes a little while before the result
+;; is available.
+(define_insn_reservation "ppro_complex_insn" 6
+ (and (eq_attr "cpu" "pentiumpro")
+ (eq_attr "type" "other,multi,call,callv,str"))
+ "decoder0")
+
+;; imov with memory operands does not use the integer units.
+(define_insn_reservation "ppro_imov" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "imov")))
+ "decodern,(p0|p1)")
+
+(define_insn_reservation "ppro_imov_load" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "imov")))
+ "decodern,p2")
+
+(define_insn_reservation "ppro_imov_store" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "imov")))
+ "decoder0,p4+p3")
+
+;; imovx always decodes to one uop, and also doesn't use the integer
+;; units if it has memory operands.
+(define_insn_reservation "ppro_imovx" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "imovx")))
+ "decodern,(p0|p1)")
+
+(define_insn_reservation "ppro_imovx_load" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "imovx")))
+ "decodern,p2")
+
+;; lea executes on port 0 with latency one and throughput 1.
+(define_insn_reservation "ppro_lea" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "lea")))
+ "decodern,p0")
+
+;; Shift and rotate execute on port 0 with latency and throughput 1.
+;; The load and store units need to be reserved when memory operands
+;; are involved.
+(define_insn_reservation "ppro_shift_rotate" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_shift_rotate_mem" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
+ "decoder0,p2+p0,p4+p3")
+
+
+;; The P6 has a sophisticated branch prediction mechanism to minimize
+;; latencies due to branching. In particular, it has a fast way to
+;; execute branches that are taken multiple times (such as in loops).
+;; Branches not taken suffer no penalty, and correctly predicted
+;; branches cost only one fetch cycle. Mispredicted branches are very
+;; costly: typically 15 cycles and possibly as many as 26 cycles.
+;;
+;; Unfortunately all this makes it quite difficult to properly model
+;; the latencies for the compiler. Here I've made the choice to be
+;; optimistic and assume branches are often predicted correctly, so
+;; they have latency 1, and the decoders are not blocked.
+;;
+;; In addition, the model assumes a branch always decodes to only 1 uop,
+;; which is not exactly true because there are a few instructions that
+;; decode to 2 uops or microcode. But this probably gives the best
+;; results because we can assume these instructions can decode on all
+;; decoders.
+(define_insn_reservation "ppro_branch" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "ibr")))
+ "decodern,p1")
+
+;; ??? Indirect branches probably have worse latency than this.
+(define_insn_reservation "ppro_indirect_branch" 6
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "type" "ibr")))
+ "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_leave" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (eq_attr "type" "leave"))
+ "decoder0,p2+(p0|p1),(p0|p1)")
+
+;; imul has throughput one, but latency 4, and can only execute on port 0.
+(define_insn_reservation "ppro_imul" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "imul")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_imul_mem" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "type" "imul")))
+ "decoder0,p2+p0")
+
+;; div and idiv are very similar, so we model them the same.
+;; QI, HI, and SI have issue latency 12, 21, and 37, respectively.
+;; These issue latencies are modelled via the ppro_div automaton.
+(define_insn_reservation "ppro_idiv_QI" 19
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "QI")
+ (eq_attr "type" "idiv"))))
+ "decoder0,(p0+idiv)*2,(p0|p1)+idiv,idiv*9")
+
+(define_insn_reservation "ppro_idiv_QI_load" 19
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "QI")
+ (eq_attr "type" "idiv"))))
+ "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*9")
+
+(define_insn_reservation "ppro_idiv_HI" 23
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "type" "idiv"))))
+ "decoder0,(p0+idiv)*3,(p0|p1)+idiv,idiv*17")
+
+(define_insn_reservation "ppro_idiv_HI_load" 23
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "HI")
+ (eq_attr "type" "idiv"))))
+ "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*18")
+
+(define_insn_reservation "ppro_idiv_SI" 39
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SI")
+ (eq_attr "type" "idiv"))))
+ "decoder0,(p0+idiv)*3,(p0|p1)+idiv,idiv*33")
+
+(define_insn_reservation "ppro_idiv_SI_load" 39
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SI")
+ (eq_attr "type" "idiv"))))
+ "decoder0,p2+p0+idiv,p0+idiv,(p0|p1)+idiv,idiv*34")
+
+;; Floating point operations always execute on port 0.
+;; ??? where do these latencies come from? fadd has latency 3 and
+;; has throughput "1/cycle (align with FADD)". What do they
+;; mean and how can we model that?
+(define_insn_reservation "ppro_fop" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none,unknown")
+ (eq_attr "type" "fop")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_fop_load" 5
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "fop")))
+ "decoder0,p2+p0,p0")
+
+(define_insn_reservation "ppro_fop_store" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "fop")))
+ "decoder0,p0,p0,p0+p4+p3")
+
+(define_insn_reservation "ppro_fop_both" 5
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "both")
+ (eq_attr "type" "fop")))
+ "decoder0,p2+p0,p0+p4+p3")
+
+(define_insn_reservation "ppro_fsgn" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (eq_attr "type" "fsgn"))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_fistp" 5
+ (and (eq_attr "cpu" "pentiumpro")
+ (eq_attr "type" "fistp"))
+ "decoder0,p0*2,p4+p3")
+
+(define_insn_reservation "ppro_fcmov" 2
+ (and (eq_attr "cpu" "pentiumpro")
+ (eq_attr "type" "fcmov"))
+ "decoder0,p0*2")
+
+(define_insn_reservation "ppro_fcmp" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "fcmp")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_fcmp_load" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "fcmp")))
+ "decoder0,p2+p0")
+
+(define_insn_reservation "ppro_fmov" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "fmov")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_fmov_load" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "!XF")
+ (eq_attr "type" "fmov"))))
+ "decodern,p2")
+
+(define_insn_reservation "ppro_fmov_XF_load" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "XF")
+ (eq_attr "type" "fmov"))))
+ "decoder0,(p2+p0)*2")
+
+(define_insn_reservation "ppro_fmov_store" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "store")
+ (and (eq_attr "mode" "!XF")
+ (eq_attr "type" "fmov"))))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_fmov_XF_store" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "store")
+ (and (eq_attr "mode" "XF")
+ (eq_attr "type" "fmov"))))
+ "decoder0,(p0+p4),(p0+p3)")
+
+;; fmul executes on port 0 with latency 5. It has issue latency 2,
+;; but we don't model this.
+(define_insn_reservation "ppro_fmul" 5
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "fmul")))
+ "decoder0,p0*2")
+
+(define_insn_reservation "ppro_fmul_load" 6
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "fmul")))
+ "decoder0,p2+p0,p0")
+
+;; fdiv latencies depend on the mode of the operands. XFmode gives
+;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18.
+;; Division by a power of 2 takes only 9 cycles, but we cannot model
+;; that. Throughput is equal to latency - 1, which we model using the
+;; ppro_div automaton.
+(define_insn_reservation "ppro_fdiv_SF" 18
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "fdiv,fpspc"))))
+ "decodern,p0+fdiv,fdiv*16")
+
+(define_insn_reservation "ppro_fdiv_SF_load" 19
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "fdiv,fpspc"))))
+ "decoder0,p2+p0+fdiv,fdiv*16")
+
+(define_insn_reservation "ppro_fdiv_DF" 32
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "DF")
+ (eq_attr "type" "fdiv,fpspc"))))
+ "decodern,p0+fdiv,fdiv*30")
+
+(define_insn_reservation "ppro_fdiv_DF_load" 33
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "DF")
+ (eq_attr "type" "fdiv,fpspc"))))
+ "decoder0,p2+p0+fdiv,fdiv*30")
+
+(define_insn_reservation "ppro_fdiv_XF" 38
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "XF")
+ (eq_attr "type" "fdiv,fpspc"))))
+ "decodern,p0+fdiv,fdiv*36")
+
+(define_insn_reservation "ppro_fdiv_XF_load" 39
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "XF")
+ (eq_attr "type" "fdiv,fpspc"))))
+ "decoder0,p2+p0+fdiv,fdiv*36")
+
+;; MMX instructions can execute on either port 0 or port 1 with a
+;; throughput of 1/cycle.
+;; on port 0: - ALU (latency 1)
+;; - Multiplier Unit (latency 3)
+;; on port 1: - ALU (latency 1)
+;; - Shift Unit (latency 1)
+;;
+;; MMX instructions are either of the type reg-reg, or read-modify, and
+;; except for mmxshft and mmxmul they can execute on port 0 or port 1,
+;; so they behave as "simple" instructions that need no special modelling.
+;; We only have to model mmxshft and mmxmul.
+(define_insn_reservation "ppro_mmx_shft" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "mmxshft")))
+ "decodern,p1")
+
+(define_insn_reservation "ppro_mmx_shft_load" 2
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "mmxshft")))
+ "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_mmx_mul" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "mmxmul")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_mmx_mul_load" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (eq_attr "type" "mmxmul")))
+ "decoder0,p2+p0")
+
+(define_insn_reservation "ppro_sse_mmxcvt" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "mode" "DI")
+ (eq_attr "type" "mmxcvt")))
+ "decodern,p1")
+
+;; FIXME: These are Pentium III only, but we cannot tell here if
+;; we're generating code for PentiumPro/Pentium II or Pentium III
+;; (define_insn_reservation "ppro_sse_mmxshft" 2
+;; (and (eq_attr "cpu" "pentiumpro")
+;; (and (eq_attr "mode" "DI")
+;; (eq_attr "type" "mmxshft")))
+;; "decodern,p0")
+
+;; SSE is very complicated, and takes a bit more effort.
+;; ??? I assumed that all SSE instructions decode on decoder0,
+;; but is this correct?
+
+;; The sfence instruction.
+(define_insn_reservation "ppro_sse_sfence" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "unknown")
+ (eq_attr "type" "sse")))
+ "decoder0,p4+p3")
+
+;; FIXME: This reservation is all wrong when we're scheduling sqrtss.
+(define_insn_reservation "ppro_sse_SF" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "sse")))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_sse_add_SF" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "sseadd"))))
+ "decodern,p1")
+
+(define_insn_reservation "ppro_sse_add_SF_load" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "sseadd"))))
+ "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_sse_cmp_SF" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssecmp"))))
+ "decoder0,p1")
+
+(define_insn_reservation "ppro_sse_cmp_SF_load" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssecmp"))))
+ "decoder0,p2+p1")
+
+(define_insn_reservation "ppro_sse_comi_SF" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssecomi"))))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_sse_comi_SF_load" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssecomi"))))
+ "decoder0,p2+p0")
+
+(define_insn_reservation "ppro_sse_mul_SF" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssemul"))))
+ "decodern,p0")
+
+(define_insn_reservation "ppro_sse_mul_SF_load" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssemul"))))
+ "decoder0,p2+p0")
+
+;; FIXME: ssediv doesn't close p0 for 17 cycles, surely???
+(define_insn_reservation "ppro_sse_div_SF" 18
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssediv"))))
+ "decoder0,p0*17")
+
+(define_insn_reservation "ppro_sse_div_SF_load" 18
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssediv"))))
+ "decoder0,(p2+p0),p0*16")
+
+(define_insn_reservation "ppro_sse_icvt_SF" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "sseicvt")))
+ "decoder0,(p2+p1)*2")
+
+(define_insn_reservation "ppro_sse_icvt_SI" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "mode" "SI")
+ (eq_attr "type" "sseicvt")))
+ "decoder0,(p2+p1)")
+
+(define_insn_reservation "ppro_sse_mov_SF" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssemov"))))
+ "decoder0,(p0|p1)")
+
+(define_insn_reservation "ppro_sse_mov_SF_load" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssemov"))))
+ "decoder0,p2+(p0|p1)")
+
+(define_insn_reservation "ppro_sse_mov_SF_store" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "store")
+ (and (eq_attr "mode" "SF")
+ (eq_attr "type" "ssemov"))))
+ "decoder0,p4+p3")
+
+(define_insn_reservation "ppro_sse_V4SF" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "sse")))
+ "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_add_V4SF" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "sseadd"))))
+ "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_add_V4SF_load" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "sseadd"))))
+ "decoder0,(p2+p1)*2")
+
+(define_insn_reservation "ppro_sse_cmp_V4SF" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssecmp"))))
+ "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_cmp_V4SF_load" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssecmp"))))
+ "decoder0,(p2+p1)*2")
+
+(define_insn_reservation "ppro_sse_cvt_V4SF" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none,unknown")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssecvt"))))
+ "decoder0,p1*2")
+
+(define_insn_reservation "ppro_sse_cvt_V4SF_other" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "!none,unknown")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssecmp"))))
+ "decoder0,p1,p4+p3")
+
+(define_insn_reservation "ppro_sse_mul_V4SF" 5
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssemul"))))
+ "decoder0,p0*2")
+
+(define_insn_reservation "ppro_sse_mul_V4SF_load" 5
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssemul"))))
+ "decoder0,(p2+p0)*2")
+
+;; FIXME: p0 really closed this long???
+(define_insn_reservation "ppro_sse_div_V4SF" 48
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssediv"))))
+ "decoder0,p0*34")
+
+(define_insn_reservation "ppro_sse_div_V4SF_load" 48
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssediv"))))
+ "decoder0,(p2+p0)*2,p0*32")
+
+(define_insn_reservation "ppro_sse_log_V4SF" 2
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "sselog,sselog1"))))
+ "decodern,p1")
+
+(define_insn_reservation "ppro_sse_log_V4SF_load" 2
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "sselog,sselog1"))))
+ "decoder0,(p2+p1)")
+
+(define_insn_reservation "ppro_sse_mov_V4SF" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssemov"))))
+ "decoder0,(p0|p1)*2")
+
+(define_insn_reservation "ppro_sse_mov_V4SF_load" 2
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssemov"))))
+ "decoder0,p2*2")
+
+(define_insn_reservation "ppro_sse_mov_V4SF_store" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "store")
+ (and (eq_attr "mode" "V4SF")
+ (eq_attr "type" "ssemov"))))
+ "decoder0,(p4+p3)*2")
+
+;; All other instructions are modelled as simple instructions.
+;; We have already modelled all i387 floating point instructions, so all
+;; other instructions execute on either port 0 or port 1. This includes
+;; the ALU units, and the MMX units.
+;;
+;; reg-reg instructions produce 1 uop so they can be decoded on any of
+;; the three decoders.
+(define_insn_reservation "ppro_insn" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "none,unknown")
+ (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
+ "decodern,(p0|p1)")
+
+;; read-modify and register-memory instructions have 2 or three uops,
+;; so they have to be decoded on decoder0.
+(define_insn_reservation "ppro_insn_load" 3
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "load")
+ (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
+ "decoder0,p2+(p0|p1)")
+
+(define_insn_reservation "ppro_insn_store" 1
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "store")
+ (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
+ "decoder0,(p0|p1),p4+p3")
+
+;; read-modify-store instructions produce 4 uops so they have to be
+;; decoded on decoder0 as well.
+(define_insn_reservation "ppro_insn_both" 4
+ (and (eq_attr "cpu" "pentiumpro")
+ (and (eq_attr "memory" "both")
+ (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,icmov,push,pop,fxch,sseiadd,sseishft,sseimul,mmx,mmxadd,mmxcmp")))
+ "decoder0,p2+(p0|p1),p4+p3")
+
diff --git a/gcc-4.4.0/gcc/config/i386/predicates.md b/gcc-4.4.0/gcc/config/i386/predicates.md
new file mode 100644
index 000000000..f1c710333
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/predicates.md
@@ -0,0 +1,1130 @@
+;; Predicate definitions for IA-32 and x86-64.
+;; Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return nonzero if OP is either a i387 or SSE fp register.
+(define_predicate "any_fp_register_operand"
+ (and (match_code "reg")
+ (match_test "ANY_FP_REGNO_P (REGNO (op))")))
+
+;; Return nonzero if OP is an i387 fp register.
+(define_predicate "fp_register_operand"
+ (and (match_code "reg")
+ (match_test "FP_REGNO_P (REGNO (op))")))
+
+;; Return nonzero if OP is a non-fp register_operand.
+(define_predicate "register_and_not_any_fp_reg_operand"
+ (and (match_code "reg")
+ (not (match_test "ANY_FP_REGNO_P (REGNO (op))"))))
+
+;; Return nonzero if OP is a register operand other than an i387 fp register.
+(define_predicate "register_and_not_fp_reg_operand"
+ (and (match_code "reg")
+ (not (match_test "FP_REGNO_P (REGNO (op))"))))
+
+;; True if the operand is an MMX register.
+(define_predicate "mmx_reg_operand"
+ (and (match_code "reg")
+ (match_test "MMX_REGNO_P (REGNO (op))")))
+
+;; True if the operand is a Q_REGS class register.
+(define_predicate "q_regs_operand"
+ (match_operand 0 "register_operand")
+{
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+ return ANY_QI_REG_P (op);
+})
+
+;; Match an SI or HImode register for a zero_extract.
+(define_special_predicate "ext_register_operand"
+ (match_operand 0 "register_operand")
+{
+ if ((!TARGET_64BIT || GET_MODE (op) != DImode)
+ && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
+ return 0;
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+
+ /* Be careful to accept only registers having upper parts. */
+ return REGNO (op) > LAST_VIRTUAL_REGISTER || REGNO (op) < 4;
+})
+
+;; Return true if op is the AX register.
+(define_predicate "ax_reg_operand"
+ (and (match_code "reg")
+ (match_test "REGNO (op) == 0")))
+
+;; Return true if op is the flags register.
+(define_predicate "flags_reg_operand"
+ (and (match_code "reg")
+ (match_test "REGNO (op) == FLAGS_REG")))
+
+;; Return true if op is not xmm0 register.
+(define_predicate "reg_not_xmm0_operand"
+ (and (match_operand 0 "register_operand")
+ (match_test "GET_CODE (op) != REG
+ || REGNO (op) != FIRST_SSE_REG")))
+
+;; As above, but allow nonimmediate operands.
+(define_predicate "nonimm_not_xmm0_operand"
+ (and (match_operand 0 "nonimmediate_operand")
+ (match_test "GET_CODE (op) != REG
+ || REGNO (op) != FIRST_SSE_REG")))
+
+;; Return 1 if VALUE can be stored in a sign extended immediate field.
+(define_predicate "x86_64_immediate_operand"
+ (match_code "const_int,symbol_ref,label_ref,const")
+{
+ if (!TARGET_64BIT)
+ return immediate_operand (op, mode);
+
+ switch (GET_CODE (op))
+ {
+ case CONST_INT:
+ /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
+ to be at least 32 and this all acceptable constants are
+ represented as CONST_INT. */
+ if (HOST_BITS_PER_WIDE_INT == 32)
+ return 1;
+ else
+ {
+ HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (op), DImode);
+ return trunc_int_for_mode (val, SImode) == val;
+ }
+ break;
+
+ case SYMBOL_REF:
+ /* For certain code models, the symbolic references are known to fit.
+ in CM_SMALL_PIC model we know it fits if it is local to the shared
+ library. Don't count TLS SYMBOL_REFs here, since they should fit
+ only if inside of UNSPEC handled below. */
+ /* TLS symbols are not constant. */
+ if (SYMBOL_REF_TLS_MODEL (op))
+ return false;
+ return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL
+ || (ix86_cmodel == CM_MEDIUM && !SYMBOL_REF_FAR_ADDR_P (op)));
+
+ case LABEL_REF:
+ /* For certain code models, the code is near as well. */
+ return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
+ || ix86_cmodel == CM_KERNEL);
+
+ case CONST:
+ /* We also may accept the offsetted memory references in certain
+ special cases. */
+ if (GET_CODE (XEXP (op, 0)) == UNSPEC)
+ switch (XINT (XEXP (op, 0), 1))
+ {
+ case UNSPEC_GOTPCREL:
+ case UNSPEC_DTPOFF:
+ case UNSPEC_GOTNTPOFF:
+ case UNSPEC_NTPOFF:
+ return 1;
+ default:
+ break;
+ }
+
+ if (GET_CODE (XEXP (op, 0)) == PLUS)
+ {
+ rtx op1 = XEXP (XEXP (op, 0), 0);
+ rtx op2 = XEXP (XEXP (op, 0), 1);
+ HOST_WIDE_INT offset;
+
+ if (ix86_cmodel == CM_LARGE)
+ return 0;
+ if (!CONST_INT_P (op2))
+ return 0;
+ offset = trunc_int_for_mode (INTVAL (op2), DImode);
+ switch (GET_CODE (op1))
+ {
+ case SYMBOL_REF:
+ /* TLS symbols are not constant. */
+ if (SYMBOL_REF_TLS_MODEL (op1))
+ return 0;
+ /* For CM_SMALL assume that latest object is 16MB before
+ end of 31bits boundary. We may also accept pretty
+ large negative constants knowing that all objects are
+ in the positive half of address space. */
+ if ((ix86_cmodel == CM_SMALL
+ || (ix86_cmodel == CM_MEDIUM
+ && !SYMBOL_REF_FAR_ADDR_P (op1)))
+ && offset < 16*1024*1024
+ && trunc_int_for_mode (offset, SImode) == offset)
+ return 1;
+ /* For CM_KERNEL we know that all object resist in the
+ negative half of 32bits address space. We may not
+ accept negative offsets, since they may be just off
+ and we may accept pretty large positive ones. */
+ if (ix86_cmodel == CM_KERNEL
+ && offset > 0
+ && trunc_int_for_mode (offset, SImode) == offset)
+ return 1;
+ break;
+
+ case LABEL_REF:
+ /* These conditions are similar to SYMBOL_REF ones, just the
+ constraints for code models differ. */
+ if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
+ && offset < 16*1024*1024
+ && trunc_int_for_mode (offset, SImode) == offset)
+ return 1;
+ if (ix86_cmodel == CM_KERNEL
+ && offset > 0
+ && trunc_int_for_mode (offset, SImode) == offset)
+ return 1;
+ break;
+
+ case UNSPEC:
+ switch (XINT (op1, 1))
+ {
+ case UNSPEC_DTPOFF:
+ case UNSPEC_NTPOFF:
+ if (offset > 0
+ && trunc_int_for_mode (offset, SImode) == offset)
+ return 1;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return 0;
+})
+
+;; Return 1 if VALUE can be stored in the zero extended immediate field.
+(define_predicate "x86_64_zext_immediate_operand"
+ (match_code "const_double,const_int,symbol_ref,label_ref,const")
+{
+ switch (GET_CODE (op))
+ {
+ case CONST_DOUBLE:
+ if (HOST_BITS_PER_WIDE_INT == 32)
+ return (GET_MODE (op) == VOIDmode && !CONST_DOUBLE_HIGH (op));
+ else
+ return 0;
+
+ case CONST_INT:
+ if (HOST_BITS_PER_WIDE_INT == 32)
+ return INTVAL (op) >= 0;
+ else
+ return !(INTVAL (op) & ~(HOST_WIDE_INT) 0xffffffff);
+
+ case SYMBOL_REF:
+ /* For certain code models, the symbolic references are known to fit. */
+ /* TLS symbols are not constant. */
+ if (SYMBOL_REF_TLS_MODEL (op))
+ return false;
+ return (ix86_cmodel == CM_SMALL
+ || (ix86_cmodel == CM_MEDIUM
+ && !SYMBOL_REF_FAR_ADDR_P (op)));
+
+ case LABEL_REF:
+ /* For certain code models, the code is near as well. */
+ return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
+
+ case CONST:
+ /* We also may accept the offsetted memory references in certain
+ special cases. */
+ if (GET_CODE (XEXP (op, 0)) == PLUS)
+ {
+ rtx op1 = XEXP (XEXP (op, 0), 0);
+ rtx op2 = XEXP (XEXP (op, 0), 1);
+
+ if (ix86_cmodel == CM_LARGE)
+ return 0;
+ switch (GET_CODE (op1))
+ {
+ case SYMBOL_REF:
+ /* TLS symbols are not constant. */
+ if (SYMBOL_REF_TLS_MODEL (op1))
+ return 0;
+ /* For small code model we may accept pretty large positive
+ offsets, since one bit is available for free. Negative
+ offsets are limited by the size of NULL pointer area
+ specified by the ABI. */
+ if ((ix86_cmodel == CM_SMALL
+ || (ix86_cmodel == CM_MEDIUM
+ && !SYMBOL_REF_FAR_ADDR_P (op1)))
+ && CONST_INT_P (op2)
+ && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
+ && trunc_int_for_mode (INTVAL (op2), SImode) == INTVAL (op2))
+ return 1;
+ /* ??? For the kernel, we may accept adjustment of
+ -0x10000000, since we know that it will just convert
+ negative address space to positive, but perhaps this
+ is not worthwhile. */
+ break;
+
+ case LABEL_REF:
+ /* These conditions are similar to SYMBOL_REF ones, just the
+ constraints for code models differ. */
+ if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
+ && CONST_INT_P (op2)
+ && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
+ && trunc_int_for_mode (INTVAL (op2), SImode) == INTVAL (op2))
+ return 1;
+ break;
+
+ default:
+ return 0;
+ }
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ return 0;
+})
+
+;; Return nonzero if OP is general operand representable on x86_64.
+(define_predicate "x86_64_general_operand"
+ (if_then_else (match_test "TARGET_64BIT")
+ (ior (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "x86_64_immediate_operand"))
+ (match_operand 0 "general_operand")))
+
+;; Return nonzero if OP is general operand representable on x86_64
+;; as either sign extended or zero extended constant.
+(define_predicate "x86_64_szext_general_operand"
+ (if_then_else (match_test "TARGET_64BIT")
+ (ior (match_operand 0 "nonimmediate_operand")
+ (ior (match_operand 0 "x86_64_immediate_operand")
+ (match_operand 0 "x86_64_zext_immediate_operand")))
+ (match_operand 0 "general_operand")))
+
+;; Return nonzero if OP is nonmemory operand representable on x86_64.
+(define_predicate "x86_64_nonmemory_operand"
+ (if_then_else (match_test "TARGET_64BIT")
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "x86_64_immediate_operand"))
+ (match_operand 0 "nonmemory_operand")))
+
+;; Return nonzero if OP is nonmemory operand representable on x86_64.
+(define_predicate "x86_64_szext_nonmemory_operand"
+ (if_then_else (match_test "TARGET_64BIT")
+ (ior (match_operand 0 "register_operand")
+ (ior (match_operand 0 "x86_64_immediate_operand")
+ (match_operand 0 "x86_64_zext_immediate_operand")))
+ (match_operand 0 "nonmemory_operand")))
+
+;; Return true when operand is PIC expression that can be computed by lea
+;; operation.
+(define_predicate "pic_32bit_operand"
+ (match_code "const,symbol_ref,label_ref")
+{
+ if (!flag_pic)
+ return 0;
+ /* Rule out relocations that translate into 64bit constants. */
+ if (TARGET_64BIT && GET_CODE (op) == CONST)
+ {
+ op = XEXP (op, 0);
+ if (GET_CODE (op) == PLUS && CONST_INT_P (XEXP (op, 1)))
+ op = XEXP (op, 0);
+ if (GET_CODE (op) == UNSPEC
+ && (XINT (op, 1) == UNSPEC_GOTOFF
+ || XINT (op, 1) == UNSPEC_GOT))
+ return 0;
+ }
+ return symbolic_operand (op, mode);
+})
+
+
+;; Return nonzero if OP is nonmemory operand acceptable by movabs patterns.
+(define_predicate "x86_64_movabs_operand"
+ (if_then_else (match_test "!TARGET_64BIT || !flag_pic")
+ (match_operand 0 "nonmemory_operand")
+ (ior (match_operand 0 "register_operand")
+ (and (match_operand 0 "const_double_operand")
+ (match_test "GET_MODE_SIZE (mode) <= 8")))))
+
+;; Returns nonzero if OP is either a symbol reference or a sum of a symbol
+;; reference and a constant.
+(define_predicate "symbolic_operand"
+ (match_code "symbol_ref,label_ref,const")
+{
+ switch (GET_CODE (op))
+ {
+ case SYMBOL_REF:
+ case LABEL_REF:
+ return 1;
+
+ case CONST:
+ op = XEXP (op, 0);
+ if (GET_CODE (op) == SYMBOL_REF
+ || GET_CODE (op) == LABEL_REF
+ || (GET_CODE (op) == UNSPEC
+ && (XINT (op, 1) == UNSPEC_GOT
+ || XINT (op, 1) == UNSPEC_GOTOFF
+ || XINT (op, 1) == UNSPEC_GOTPCREL)))
+ return 1;
+ if (GET_CODE (op) != PLUS
+ || !CONST_INT_P (XEXP (op, 1)))
+ return 0;
+
+ op = XEXP (op, 0);
+ if (GET_CODE (op) == SYMBOL_REF
+ || GET_CODE (op) == LABEL_REF)
+ return 1;
+ /* Only @GOTOFF gets offsets. */
+ if (GET_CODE (op) != UNSPEC
+ || XINT (op, 1) != UNSPEC_GOTOFF)
+ return 0;
+
+ op = XVECEXP (op, 0, 0);
+ if (GET_CODE (op) == SYMBOL_REF
+ || GET_CODE (op) == LABEL_REF)
+ return 1;
+ return 0;
+
+ default:
+ gcc_unreachable ();
+ }
+})
+
+;; Return true if the operand contains a @GOT or @GOTOFF reference.
+(define_predicate "pic_symbolic_operand"
+ (match_code "const")
+{
+ op = XEXP (op, 0);
+ if (TARGET_64BIT)
+ {
+ if (GET_CODE (op) == UNSPEC
+ && XINT (op, 1) == UNSPEC_GOTPCREL)
+ return 1;
+ if (GET_CODE (op) == PLUS
+ && GET_CODE (XEXP (op, 0)) == UNSPEC
+ && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
+ return 1;
+ }
+ else
+ {
+ if (GET_CODE (op) == UNSPEC)
+ return 1;
+ if (GET_CODE (op) != PLUS
+ || !CONST_INT_P (XEXP (op, 1)))
+ return 0;
+ op = XEXP (op, 0);
+ if (GET_CODE (op) == UNSPEC
+ && XINT (op, 1) != UNSPEC_MACHOPIC_OFFSET)
+ return 1;
+ }
+ return 0;
+})
+
+;; Return true if OP is a symbolic operand that resolves locally.
+(define_predicate "local_symbolic_operand"
+ (match_code "const,label_ref,symbol_ref")
+{
+ if (GET_CODE (op) == CONST
+ && GET_CODE (XEXP (op, 0)) == PLUS
+ && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
+ op = XEXP (XEXP (op, 0), 0);
+
+ if (GET_CODE (op) == LABEL_REF)
+ return 1;
+
+ if (GET_CODE (op) != SYMBOL_REF)
+ return 0;
+
+ if (SYMBOL_REF_TLS_MODEL (op) != 0)
+ return 0;
+
+ if (SYMBOL_REF_LOCAL_P (op))
+ return 1;
+
+ /* There is, however, a not insubstantial body of code in the rest of
+ the compiler that assumes it can just stick the results of
+ ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
+ /* ??? This is a hack. Should update the body of the compiler to
+ always create a DECL an invoke targetm.encode_section_info. */
+ if (strncmp (XSTR (op, 0), internal_label_prefix,
+ internal_label_prefix_len) == 0)
+ return 1;
+
+ return 0;
+})
+
+;; Test for a legitimate @GOTOFF operand.
+;;
+;; VxWorks does not impose a fixed gap between segments; the run-time
+;; gap can be different from the object-file gap. We therefore can't
+;; use @GOTOFF unless we are absolutely sure that the symbol is in the
+;; same segment as the GOT. Unfortunately, the flexibility of linker
+;; scripts means that we can't be sure of that in general, so assume
+;; that @GOTOFF is never valid on VxWorks.
+(define_predicate "gotoff_operand"
+ (and (match_test "!TARGET_VXWORKS_RTP")
+ (match_operand 0 "local_symbolic_operand")))
+
+;; Test for various thread-local symbols.
+(define_predicate "tls_symbolic_operand"
+ (and (match_code "symbol_ref")
+ (match_test "SYMBOL_REF_TLS_MODEL (op) != 0")))
+
+(define_predicate "tls_modbase_operand"
+ (and (match_code "symbol_ref")
+ (match_test "op == ix86_tls_module_base ()")))
+
+(define_predicate "tp_or_register_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (match_code "unspec")
+ (match_test "XINT (op, 1) == UNSPEC_TP"))))
+
+;; Test for a pc-relative call operand
+(define_predicate "constant_call_address_operand"
+ (match_code "symbol_ref")
+{
+ if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
+ return false;
+ if (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op))
+ return false;
+ return true;
+})
+
+;; True for any non-virtual or eliminable register. Used in places where
+;; instantiation of such a register may cause the pattern to not be recognized.
+(define_predicate "register_no_elim_operand"
+ (match_operand 0 "register_operand")
+{
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+ return !(op == arg_pointer_rtx
+ || op == frame_pointer_rtx
+ || IN_RANGE (REGNO (op),
+ FIRST_PSEUDO_REGISTER, LAST_VIRTUAL_REGISTER));
+})
+
+;; Similarly, but include the stack pointer. This is used to prevent esp
+;; from being used as an index reg.
+(define_predicate "index_register_operand"
+ (match_operand 0 "register_operand")
+{
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+ if (reload_in_progress || reload_completed)
+ return REG_OK_FOR_INDEX_STRICT_P (op);
+ else
+ return REG_OK_FOR_INDEX_NONSTRICT_P (op);
+})
+
+;; Return false if this is any eliminable register. Otherwise general_operand.
+(define_predicate "general_no_elim_operand"
+ (if_then_else (match_code "reg,subreg")
+ (match_operand 0 "register_no_elim_operand")
+ (match_operand 0 "general_operand")))
+
+;; Return false if this is any eliminable register. Otherwise
+;; register_operand or a constant.
+(define_predicate "nonmemory_no_elim_operand"
+ (ior (match_operand 0 "register_no_elim_operand")
+ (match_operand 0 "immediate_operand")))
+
+;; Test for a valid operand for a call instruction.
+(define_predicate "call_insn_operand"
+ (ior (match_operand 0 "constant_call_address_operand")
+ (ior (match_operand 0 "register_no_elim_operand")
+ (match_operand 0 "memory_operand"))))
+
+;; Similarly, but for tail calls, in which we cannot allow memory references.
+(define_predicate "sibcall_insn_operand"
+ (ior (match_operand 0 "constant_call_address_operand")
+ (match_operand 0 "register_no_elim_operand")))
+
+;; Match exactly zero.
+(define_predicate "const0_operand"
+ (match_code "const_int,const_double,const_vector")
+{
+ if (mode == VOIDmode)
+ mode = GET_MODE (op);
+ return op == CONST0_RTX (mode);
+})
+
+;; Match exactly one.
+(define_predicate "const1_operand"
+ (and (match_code "const_int")
+ (match_test "op == const1_rtx")))
+
+;; Match exactly eight.
+(define_predicate "const8_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) == 8")))
+
+;; Match 2, 4, or 8. Used for leal multiplicands.
+(define_predicate "const248_operand"
+ (match_code "const_int")
+{
+ HOST_WIDE_INT i = INTVAL (op);
+ return i == 2 || i == 4 || i == 8;
+})
+
+;; Match 0 or 1.
+(define_predicate "const_0_to_1_operand"
+ (and (match_code "const_int")
+ (match_test "op == const0_rtx || op == const1_rtx")))
+
+;; Match 0 to 3.
+(define_predicate "const_0_to_3_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 3)")))
+
+;; Match 0 to 7.
+(define_predicate "const_0_to_7_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+
+;; Match 0 to 15.
+(define_predicate "const_0_to_15_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
+
+;; Match 0 to 31.
+(define_predicate "const_0_to_31_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 31)")))
+
+;; Match 0 to 63.
+(define_predicate "const_0_to_63_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 63)")))
+
+;; Match 0 to 255.
+(define_predicate "const_0_to_255_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 255)")))
+
+;; Match (0 to 255) * 8
+(define_predicate "const_0_to_255_mul_8_operand"
+ (match_code "const_int")
+{
+ unsigned HOST_WIDE_INT val = INTVAL (op);
+ return val <= 255*8 && val % 8 == 0;
+})
+
+;; Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
+;; for shift & compare patterns, as shifting by 0 does not change flags).
+(define_predicate "const_1_to_31_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 1, 31)")))
+
+;; Return nonzero if OP is CONST_INT >= 1 and <= 63 (a valid operand
+;; for 64bit shift & compare patterns, as shifting by 0 does not change flags).
+(define_predicate "const_1_to_63_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 1, 63)")))
+
+;; Match 2 or 3.
+(define_predicate "const_2_to_3_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 2, 3)")))
+
+;; Match 4 to 5.
+(define_predicate "const_4_to_5_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 4, 5)")))
+
+;; Match 4 to 7.
+(define_predicate "const_4_to_7_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 4, 7)")))
+
+;; Match 6 to 7.
+(define_predicate "const_6_to_7_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 6, 7)")))
+
+;; Match 8 to 11.
+(define_predicate "const_8_to_11_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 8, 11)")))
+
+;; Match 12 to 15.
+(define_predicate "const_12_to_15_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 12, 15)")))
+
+;; Match exactly one bit in 2-bit mask.
+(define_predicate "const_pow2_1_to_2_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) == 1 || INTVAL (op) == 2")))
+
+;; Match exactly one bit in 4-bit mask.
+(define_predicate "const_pow2_1_to_8_operand"
+ (match_code "const_int")
+{
+ unsigned int log = exact_log2 (INTVAL (op));
+ return log <= 3;
+})
+
+;; Match exactly one bit in 8-bit mask.
+(define_predicate "const_pow2_1_to_128_operand"
+ (match_code "const_int")
+{
+ unsigned int log = exact_log2 (INTVAL (op));
+ return log <= 7;
+})
+
+;; Match exactly one bit in 16-bit mask.
+(define_predicate "const_pow2_1_to_32768_operand"
+ (match_code "const_int")
+{
+ unsigned int log = exact_log2 (INTVAL (op));
+ return log <= 15;
+})
+
+;; True if this is a constant appropriate for an increment or decrement.
+(define_predicate "incdec_operand"
+ (match_code "const_int")
+{
+ /* On Pentium4, the inc and dec operations causes extra dependency on flag
+ registers, since carry flag is not set. */
+ if (!TARGET_USE_INCDEC && !optimize_size)
+ return 0;
+ return op == const1_rtx || op == constm1_rtx;
+})
+
+;; True for registers, or 1 or -1. Used to optimize double-word shifts.
+(define_predicate "reg_or_pm1_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (match_code "const_int")
+ (match_test "op == const1_rtx || op == constm1_rtx"))))
+
+;; True if OP is acceptable as operand of DImode shift expander.
+(define_predicate "shiftdi_operand"
+ (if_then_else (match_test "TARGET_64BIT")
+ (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "register_operand")))
+
+(define_predicate "ashldi_input_operand"
+ (if_then_else (match_test "TARGET_64BIT")
+ (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "reg_or_pm1_operand")))
+
+;; Return true if OP is a vector load from the constant pool with just
+;; the first element nonzero.
+(define_predicate "zero_extended_scalar_load_operand"
+ (match_code "mem")
+{
+ unsigned n_elts;
+ op = maybe_get_pool_constant (op);
+ if (!op)
+ return 0;
+ if (GET_CODE (op) != CONST_VECTOR)
+ return 0;
+ n_elts =
+ (GET_MODE_SIZE (GET_MODE (op)) /
+ GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
+ for (n_elts--; n_elts > 0; n_elts--)
+ {
+ rtx elt = CONST_VECTOR_ELT (op, n_elts);
+ if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
+ return 0;
+ }
+ return 1;
+})
+
+/* Return true if operand is a vector constant that is all ones. */
+(define_predicate "vector_all_ones_operand"
+ (match_code "const_vector")
+{
+ int nunits = GET_MODE_NUNITS (mode);
+
+ if (GET_CODE (op) == CONST_VECTOR
+ && CONST_VECTOR_NUNITS (op) == nunits)
+ {
+ int i;
+ for (i = 0; i < nunits; ++i)
+ {
+ rtx x = CONST_VECTOR_ELT (op, i);
+ if (x != constm1_rtx)
+ return 0;
+ }
+ return 1;
+ }
+
+ return 0;
+})
+
+; Return 1 when OP is operand acceptable for standard SSE move.
+(define_predicate "vector_move_operand"
+ (ior (match_operand 0 "nonimmediate_operand")
+ (match_operand 0 "const0_operand")))
+
+;; Return 1 when OP is nonimmediate or standard SSE constant.
+(define_predicate "nonimmediate_or_sse_const_operand"
+ (match_operand 0 "general_operand")
+{
+ if (nonimmediate_operand (op, mode))
+ return 1;
+ if (standard_sse_constant_p (op) > 0)
+ return 1;
+ return 0;
+})
+
+;; Return true if OP is a register or a zero.
+(define_predicate "reg_or_0_operand"
+ (ior (match_operand 0 "register_operand")
+ (match_operand 0 "const0_operand")))
+
+;; Return true if op if a valid address, and does not contain
+;; a segment override.
+(define_special_predicate "no_seg_address_operand"
+ (match_operand 0 "address_operand")
+{
+ struct ix86_address parts;
+ int ok;
+
+ ok = ix86_decompose_address (op, &parts);
+ gcc_assert (ok);
+ return parts.seg == SEG_DEFAULT;
+})
+
+;; Return nonzero if the rtx is known to be at least 32 bits aligned.
+(define_predicate "aligned_operand"
+ (match_operand 0 "general_operand")
+{
+ struct ix86_address parts;
+ int ok;
+
+ /* Registers and immediate operands are always "aligned". */
+ if (GET_CODE (op) != MEM)
+ return 1;
+
+ /* All patterns using aligned_operand on memory operands ends up
+ in promoting memory operand to 64bit and thus causing memory mismatch. */
+ if (TARGET_MEMORY_MISMATCH_STALL && !optimize_size)
+ return 0;
+
+ /* Don't even try to do any aligned optimizations with volatiles. */
+ if (MEM_VOLATILE_P (op))
+ return 0;
+
+ if (MEM_ALIGN (op) >= 32)
+ return 1;
+
+ op = XEXP (op, 0);
+
+ /* Pushes and pops are only valid on the stack pointer. */
+ if (GET_CODE (op) == PRE_DEC
+ || GET_CODE (op) == POST_INC)
+ return 1;
+
+ /* Decode the address. */
+ ok = ix86_decompose_address (op, &parts);
+ gcc_assert (ok);
+
+ /* Look for some component that isn't known to be aligned. */
+ if (parts.index)
+ {
+ if (REGNO_POINTER_ALIGN (REGNO (parts.index)) * parts.scale < 32)
+ return 0;
+ }
+ if (parts.base)
+ {
+ if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
+ return 0;
+ }
+ if (parts.disp)
+ {
+ if (!CONST_INT_P (parts.disp)
+ || (INTVAL (parts.disp) & 3) != 0)
+ return 0;
+ }
+
+ /* Didn't find one -- this must be an aligned address. */
+ return 1;
+})
+
+;; Returns 1 if OP is memory operand with a displacement.
+(define_predicate "memory_displacement_operand"
+ (match_operand 0 "memory_operand")
+{
+ struct ix86_address parts;
+ int ok;
+
+ ok = ix86_decompose_address (XEXP (op, 0), &parts);
+ gcc_assert (ok);
+ return parts.disp != NULL_RTX;
+})
+
+;; Returns 1 if OP is memory operand with a displacement only.
+(define_predicate "memory_displacement_only_operand"
+ (match_operand 0 "memory_operand")
+{
+ struct ix86_address parts;
+ int ok;
+
+ ok = ix86_decompose_address (XEXP (op, 0), &parts);
+ gcc_assert (ok);
+
+ if (parts.base || parts.index)
+ return 0;
+
+ return parts.disp != NULL_RTX;
+})
+
+;; Returns 1 if OP is memory operand which will need zero or
+;; one register at most, not counting stack pointer or frame pointer.
+(define_predicate "cmpxchg8b_pic_memory_operand"
+ (match_operand 0 "memory_operand")
+{
+ struct ix86_address parts;
+ int ok;
+
+ ok = ix86_decompose_address (XEXP (op, 0), &parts);
+ gcc_assert (ok);
+ if (parts.base == NULL_RTX
+ || parts.base == arg_pointer_rtx
+ || parts.base == frame_pointer_rtx
+ || parts.base == hard_frame_pointer_rtx
+ || parts.base == stack_pointer_rtx)
+ return 1;
+
+ if (parts.index == NULL_RTX
+ || parts.index == arg_pointer_rtx
+ || parts.index == frame_pointer_rtx
+ || parts.index == hard_frame_pointer_rtx
+ || parts.index == stack_pointer_rtx)
+ return 1;
+
+ return 0;
+})
+
+
+;; Returns 1 if OP is memory operand that cannot be represented
+;; by the modRM array.
+(define_predicate "long_memory_operand"
+ (and (match_operand 0 "memory_operand")
+ (match_test "memory_address_length (op) != 0")))
+
+;; Return 1 if OP is a comparison operator that can be issued by fcmov.
+(define_predicate "fcmov_comparison_operator"
+ (match_operand 0 "comparison_operator")
+{
+ enum machine_mode inmode = GET_MODE (XEXP (op, 0));
+ enum rtx_code code = GET_CODE (op);
+
+ if (inmode == CCFPmode || inmode == CCFPUmode)
+ {
+ enum rtx_code second_code, bypass_code;
+ ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
+ if (bypass_code != UNKNOWN || second_code != UNKNOWN)
+ return 0;
+ code = ix86_fp_compare_code_to_integer (code);
+ }
+ /* i387 supports just limited amount of conditional codes. */
+ switch (code)
+ {
+ case LTU: case GTU: case LEU: case GEU:
+ if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode
+ || inmode == CCCmode)
+ return 1;
+ return 0;
+ case ORDERED: case UNORDERED:
+ case EQ: case NE:
+ return 1;
+ default:
+ return 0;
+ }
+})
+
+;; Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS insns.
+;; The first set are supported directly; the second set can't be done with
+;; full IEEE support, i.e. NaNs.
+;;
+;; ??? It would seem that we have a lot of uses of this predicate that pass
+;; it the wrong mode. We got away with this because the old function didn't
+;; check the mode at all. Mirror that for now by calling this a special
+;; predicate.
+
+(define_special_predicate "sse_comparison_operator"
+ (match_code "eq,lt,le,unordered,ne,unge,ungt,ordered"))
+
+;; Return 1 if OP is a comparison operator that can be issued by
+;; avx predicate generation instructions
+(define_predicate "avx_comparison_float_operator"
+ (match_code "ne,eq,ge,gt,le,lt,unordered,ordered,uneq,unge,ungt,unle,unlt,ltgt"))
+
+;; Return 1 if OP is a comparison operator that can be issued by sse predicate
+;; generation instructions
+(define_predicate "sse5_comparison_float_operator"
+ (and (match_test "TARGET_SSE5")
+ (match_code "ne,eq,ge,gt,le,lt,unordered,ordered,uneq,unge,ungt,unle,unlt,ltgt")))
+
+(define_predicate "ix86_comparison_int_operator"
+ (match_code "ne,eq,ge,gt,le,lt"))
+
+(define_predicate "ix86_comparison_uns_operator"
+ (match_code "ne,eq,geu,gtu,leu,ltu"))
+
+(define_predicate "bt_comparison_operator"
+ (match_code "ne,eq"))
+
+;; Return 1 if OP is a valid comparison operator in valid mode.
+(define_predicate "ix86_comparison_operator"
+ (match_operand 0 "comparison_operator")
+{
+ enum machine_mode inmode = GET_MODE (XEXP (op, 0));
+ enum rtx_code code = GET_CODE (op);
+
+ if (inmode == CCFPmode || inmode == CCFPUmode)
+ {
+ enum rtx_code second_code, bypass_code;
+ ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
+ return (bypass_code == UNKNOWN && second_code == UNKNOWN);
+ }
+ switch (code)
+ {
+ case EQ: case NE:
+ return 1;
+ case LT: case GE:
+ if (inmode == CCmode || inmode == CCGCmode
+ || inmode == CCGOCmode || inmode == CCNOmode)
+ return 1;
+ return 0;
+ case LTU: case GTU: case LEU: case GEU:
+ if (inmode == CCmode || inmode == CCCmode)
+ return 1;
+ return 0;
+ case ORDERED: case UNORDERED:
+ if (inmode == CCmode)
+ return 1;
+ return 0;
+ case GT: case LE:
+ if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
+ return 1;
+ return 0;
+ default:
+ return 0;
+ }
+})
+
+;; Return 1 if OP is a valid comparison operator testing carry flag to be set.
+(define_predicate "ix86_carry_flag_operator"
+ (match_code "ltu,lt,unlt,gtu,gt,ungt,le,unle,ge,unge,ltgt,uneq")
+{
+ enum machine_mode inmode = GET_MODE (XEXP (op, 0));
+ enum rtx_code code = GET_CODE (op);
+
+ if (!REG_P (XEXP (op, 0))
+ || REGNO (XEXP (op, 0)) != FLAGS_REG
+ || XEXP (op, 1) != const0_rtx)
+ return 0;
+
+ if (inmode == CCFPmode || inmode == CCFPUmode)
+ {
+ enum rtx_code second_code, bypass_code;
+ ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
+ if (bypass_code != UNKNOWN || second_code != UNKNOWN)
+ return 0;
+ code = ix86_fp_compare_code_to_integer (code);
+ }
+ else if (inmode == CCCmode)
+ return code == LTU || code == GTU;
+ else if (inmode != CCmode)
+ return 0;
+
+ return code == LTU;
+})
+
+;; Nearly general operand, but accept any const_double, since we wish
+;; to be able to drop them into memory rather than have them get pulled
+;; into registers.
+(define_predicate "cmp_fp_expander_operand"
+ (ior (match_code "const_double")
+ (match_operand 0 "general_operand")))
+
+;; Return true if this is a valid binary floating-point operation.
+(define_predicate "binary_fp_operator"
+ (match_code "plus,minus,mult,div"))
+
+;; Return true if this is a multiply operation.
+(define_predicate "mult_operator"
+ (match_code "mult"))
+
+;; Return true if this is a division operation.
+(define_predicate "div_operator"
+ (match_code "div"))
+
+;; Return true if this is a float extend operation.
+(define_predicate "float_operator"
+ (match_code "float"))
+
+;; Return true for ARITHMETIC_P.
+(define_predicate "arith_or_logical_operator"
+ (match_code "plus,mult,and,ior,xor,smin,smax,umin,umax,compare,minus,div,
+ mod,udiv,umod,ashift,rotate,ashiftrt,lshiftrt,rotatert"))
+
+;; Return true for COMMUTATIVE_P.
+(define_predicate "commutative_operator"
+ (match_code "plus,mult,and,ior,xor,smin,smax,umin,umax"))
+
+;; Return 1 if OP is a binary operator that can be promoted to wider mode.
+(define_predicate "promotable_binary_operator"
+ (ior (match_code "plus,and,ior,xor,ashift")
+ (and (match_code "mult")
+ (match_test "TARGET_TUNE_PROMOTE_HIMODE_IMUL"))))
+
+;; To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
+;; re-recognize the operand to avoid a copy_to_mode_reg that will fail.
+;;
+;; ??? It seems likely that this will only work because cmpsi is an
+;; expander, and no actual insns use this.
+
+(define_predicate "cmpsi_operand"
+ (ior (match_operand 0 "nonimmediate_operand")
+ (and (match_code "and")
+ (match_code "zero_extract" "0")
+ (match_code "const_int" "1")
+ (match_code "const_int" "01")
+ (match_code "const_int" "02")
+ (match_test "INTVAL (XEXP (XEXP (op, 0), 1)) == 8")
+ (match_test "INTVAL (XEXP (XEXP (op, 0), 2)) == 8")
+ )))
+
+(define_predicate "compare_operator"
+ (match_code "compare"))
+
+(define_predicate "absneg_operator"
+ (match_code "abs,neg"))
+
+;; Return 1 if OP is misaligned memory operand
+(define_predicate "misaligned_operand"
+ (and (match_code "mem")
+ (match_test "MEM_ALIGN (op) < GET_MODE_ALIGNMENT (mode)")))
+
+;; Return 1 if OP is a vzeroall operation, known to be a PARALLEL.
+(define_predicate "vzeroall_operation"
+ (match_code "parallel")
+{
+ int nregs = TARGET_64BIT ? 16 : 8;
+
+ if (XVECLEN (op, 0) != nregs + 1)
+ return 0;
+
+ return 1;
+})
diff --git a/gcc-4.4.0/gcc/config/i386/rtemself.h b/gcc-4.4.0/gcc/config/i386/rtemself.h
new file mode 100644
index 000000000..ac492ec35
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/rtemself.h
@@ -0,0 +1,32 @@
+/* Definitions for rtems targeting an ix86 using ELF.
+ Copyright (C) 1996, 1997, 2000, 2001, 2002, 2007 Free Software Foundation, Inc.
+ Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Specify predefined symbols in preprocessor. */
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_define ("__rtems__"); \
+ builtin_define ("__USE_INIT_FINI__"); \
+ builtin_assert ("system=rtems"); \
+ if (!TARGET_80387) \
+ builtin_define ("_SOFT_FLOAT"); \
+ } \
+ while (0)
diff --git a/gcc-4.4.0/gcc/config/i386/sfp-machine.h b/gcc-4.4.0/gcc/config/i386/sfp-machine.h
new file mode 100644
index 000000000..f2df86965
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/sfp-machine.h
@@ -0,0 +1,5 @@
+#ifdef __x86_64__
+#include "config/i386/64/sfp-machine.h"
+#else
+#include "config/i386/32/sfp-machine.h"
+#endif
diff --git a/gcc-4.4.0/gcc/config/i386/smmintrin.h b/gcc-4.4.0/gcc/config/i386/smmintrin.h
new file mode 100644
index 000000000..1a2998949
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/smmintrin.h
@@ -0,0 +1,724 @@
+/* Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>.
+
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 10.0. */
+
+#ifndef _SMMINTRIN_H_INCLUDED
+#define _SMMINTRIN_H_INCLUDED
+
+#ifndef __SSE4_1__
+# error "SSE4.1 instruction set not enabled"
+#else
+
+/* We need definitions from the SSSE3, SSE3, SSE2 and SSE header
+ files. */
+#include <tmmintrin.h>
+#include <mmintrin-common.h>
+
+/* SSE4.1 */
+
+/* Integer blend instructions - select data from 2 sources using
+ constant/variable mask. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
+{
+ return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X,
+ (__v8hi)__Y,
+ __M);
+}
+#else
+#define _mm_blend_epi16(X, Y, M) \
+ ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(X), \
+ (__v8hi)(__m128i)(Y), (int)(M)))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
+{
+ return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ (__v16qi)__M);
+}
+
+/* Single precision floating point blend instructions - select data
+ from 2 sources using constant/variable mask. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
+{
+ return (__m128) __builtin_ia32_blendps ((__v4sf)__X,
+ (__v4sf)__Y,
+ __M);
+}
+#else
+#define _mm_blend_ps(X, Y, M) \
+ ((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(M)))
+#endif
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
+{
+ return (__m128) __builtin_ia32_blendvps ((__v4sf)__X,
+ (__v4sf)__Y,
+ (__v4sf)__M);
+}
+
+/* Double precision floating point blend instructions - select data
+ from 2 sources using constant/variable mask. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
+{
+ return (__m128d) __builtin_ia32_blendpd ((__v2df)__X,
+ (__v2df)__Y,
+ __M);
+}
+#else
+#define _mm_blend_pd(X, Y, M) \
+ ((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(M)))
+#endif
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
+{
+ return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X,
+ (__v2df)__Y,
+ (__v2df)__M);
+}
+
+/* Dot product instructions with mask-defined summing and zeroing parts
+ of result. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
+{
+ return (__m128) __builtin_ia32_dpps ((__v4sf)__X,
+ (__v4sf)__Y,
+ __M);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
+{
+ return (__m128d) __builtin_ia32_dppd ((__v2df)__X,
+ (__v2df)__Y,
+ __M);
+}
+#else
+#define _mm_dp_ps(X, Y, M) \
+ ((__m128) __builtin_ia32_dpps ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(M)))
+
+#define _mm_dp_pd(X, Y, M) \
+ ((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(M)))
+#endif
+
+/* Packed integer 64-bit comparison, zeroing or filling with ones
+ corresponding parts of result. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Min/max packed integer instructions. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_epu32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_epu32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Packed integer 32-bit multiplication with truncation of upper
+ halves of results. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mullo_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Packed integer 32-bit multiplication of 2 pairs of operands
+ with two 64-bit results. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Insert single precision float into packed single precision array
+ element selected by index N. The bits [7-6] of N define S
+ index, the bits [5-4] define D index, and bits [3-0] define
+ zeroing mask for D. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_ps (__m128 __D, __m128 __S, const int __N)
+{
+ return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D,
+ (__v4sf)__S,
+ __N);
+}
+#else
+#define _mm_insert_ps(D, S, N) \
+ ((__m128) __builtin_ia32_insertps128 ((__v4sf)(__m128)(D), \
+ (__v4sf)(__m128)(S), (int)(N)))
+#endif
+
+/* Helper macro to create the N value for _mm_insert_ps. */
+#define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M))
+
+/* Extract binary representation of single precision float from packed
+ single precision array element of X selected by index N. */
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_ps (__m128 __X, const int __N)
+{
+ union { int i; float f; } __tmp;
+ __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N);
+ return __tmp.i;
+}
+#else
+#define _mm_extract_ps(X, N) \
+ (__extension__ \
+ ({ \
+ union { int i; float f; } __tmp; \
+ __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(__m128)(X), (int)(N)); \
+ __tmp.i; \
+ }))
+#endif
+
+/* Extract binary representation of single precision float into
+ D from packed single precision array element of S selected
+ by index N. */
+#define _MM_EXTRACT_FLOAT(D, S, N) \
+ { (D) = __builtin_ia32_vec_ext_v4sf ((__v4sf)(S), (N)); }
+
+/* Extract specified single precision float element into the lower
+ part of __m128. */
+#define _MM_PICK_OUT_PS(X, N) \
+ _mm_insert_ps (_mm_setzero_ps (), (X), \
+ _MM_MK_INSERTPS_NDX ((N), 0, 0x0e))
+
+/* Insert integer, S, into packed integer array element of D
+ selected by index N. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi8 (__m128i __D, int __S, const int __N)
+{
+ return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D,
+ __S, __N);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi32 (__m128i __D, int __S, const int __N)
+{
+ return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D,
+ __S, __N);
+}
+
+#ifdef __x86_64__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi64 (__m128i __D, long long __S, const int __N)
+{
+ return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D,
+ __S, __N);
+}
+#endif
+#else
+#define _mm_insert_epi8(D, S, N) \
+ ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(__m128i)(D), \
+ (int)(S), (int)(N)))
+
+#define _mm_insert_epi32(D, S, N) \
+ ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(__m128i)(D), \
+ (int)(S), (int)(N)))
+
+#ifdef __x86_64__
+#define _mm_insert_epi64(D, S, N) \
+ ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(__m128i)(D), \
+ (long long)(S), (int)(N)))
+#endif
+#endif
+
+/* Extract integer from packed integer array element of X selected by
+ index N. */
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi8 (__m128i __X, const int __N)
+{
+ return __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi32 (__m128i __X, const int __N)
+{
+ return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N);
+}
+
+#ifdef __x86_64__
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_epi64 (__m128i __X, const int __N)
+{
+ return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N);
+}
+#endif
+#else
+#define _mm_extract_epi8(X, N) \
+ ((int) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N)))
+#define _mm_extract_epi32(X, N) \
+ ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N)))
+
+#ifdef __x86_64__
+#define _mm_extract_epi64(X, N) \
+ ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(__m128i)(X), (int)(N)))
+#endif
+#endif
+
+/* Return horizontal packed word minimum and its index in bits [15:0]
+ and bits [18:16] respectively. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_minpos_epu16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X);
+}
+
+/* Packed integer sign-extension. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi32_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi16_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepi8_epi16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X);
+}
+
+/* Packed integer zero-extension. */
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu32_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu16_epi64 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtepu8_epi16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X);
+}
+
+/* Pack 8 double words from 2 operands into 8 words of result with
+ unsigned saturation. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_packus_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y);
+}
+
+/* Sum absolute 8-bit integer difference of adjacent groups of 4
+ byte integers in the first 2 operands. Starting offsets within
+ operands are determined by the 3rd mask operand. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
+{
+ return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X,
+ (__v16qi)__Y, __M);
+}
+#else
+#define _mm_mpsadbw_epu8(X, Y, M) \
+ ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#endif
+
+/* Load double quadword using non-temporal aligned hint. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_load_si128 (__m128i *__X)
+{
+ return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
+}
+
+#ifdef __SSE4_2__
+
+/* These macros specify the source data format. */
+#define _SIDD_UBYTE_OPS 0x00
+#define _SIDD_UWORD_OPS 0x01
+#define _SIDD_SBYTE_OPS 0x02
+#define _SIDD_SWORD_OPS 0x03
+
+/* These macros specify the comparison operation. */
+#define _SIDD_CMP_EQUAL_ANY 0x00
+#define _SIDD_CMP_RANGES 0x04
+#define _SIDD_CMP_EQUAL_EACH 0x08
+#define _SIDD_CMP_EQUAL_ORDERED 0x0c
+
+/* These macros specify the the polarity. */
+#define _SIDD_POSITIVE_POLARITY 0x00
+#define _SIDD_NEGATIVE_POLARITY 0x10
+#define _SIDD_MASKED_POSITIVE_POLARITY 0x20
+#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30
+
+/* These macros specify the output selection in _mm_cmpXstri (). */
+#define _SIDD_LEAST_SIGNIFICANT 0x00
+#define _SIDD_MOST_SIGNIFICANT 0x40
+
+/* These macros specify the output selection in _mm_cmpXstrm (). */
+#define _SIDD_BIT_MASK 0x00
+#define _SIDD_UNIT_MASK 0x40
+
+/* Intrinsics for text/string processing. */
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
+{
+ return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+#else
+#define _mm_cmpistrm(X, Y, M) \
+ ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistri(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistri128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+
+#define _mm_cmpestrm(X, LX, Y, LY, M) \
+ ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(__m128i)(X), \
+ (int)(LX), (__v16qi)(__m128i)(Y), \
+ (int)(LY), (int)(M)))
+#define _mm_cmpestri(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#endif
+
+/* Intrinsics for text/string processing and reading values of
+ EFlags. */
+
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
+{
+ return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
+ (__v16qi)__Y,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
+{
+ return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
+ (__v16qi)__Y, __LY,
+ __M);
+}
+#else
+#define _mm_cmpistra(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistria128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrc(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistric128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistro(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistrio128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrs(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistris128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+#define _mm_cmpistrz(X, Y, M) \
+ ((int) __builtin_ia32_pcmpistriz128 ((__v16qi)(__m128i)(X), \
+ (__v16qi)(__m128i)(Y), (int)(M)))
+
+#define _mm_cmpestra(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestria128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#define _mm_cmpestrc(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestric128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#define _mm_cmpestro(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestrio128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#define _mm_cmpestrs(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestris128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#define _mm_cmpestrz(X, LX, Y, LY, M) \
+ ((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \
+ (__v16qi)(__m128i)(Y), (int)(LY), \
+ (int)(M)))
+#endif
+
+/* Packed integer 64-bit comparison, zeroing or filling with ones
+ corresponding parts of result. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Calculate a number of bits set to 1. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_u32 (unsigned int __X)
+{
+ return __builtin_popcount (__X);
+}
+
+#ifdef __x86_64__
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_u64 (unsigned long long __X)
+{
+ return __builtin_popcountll (__X);
+}
+#endif
+
+/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u8 (unsigned int __C, unsigned char __V)
+{
+ return __builtin_ia32_crc32qi (__C, __V);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u16 (unsigned int __C, unsigned short __V)
+{
+ return __builtin_ia32_crc32hi (__C, __V);
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u32 (unsigned int __C, unsigned int __V)
+{
+ return __builtin_ia32_crc32si (__C, __V);
+}
+
+#ifdef __x86_64__
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
+{
+ return __builtin_ia32_crc32di (__C, __V);
+}
+#endif
+
+#endif /* __SSE4_2__ */
+
+#endif /* __SSE4_1__ */
+
+#endif /* _SMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/sol2-10.h b/gcc-4.4.0/gcc/config/i386/sol2-10.h
new file mode 100644
index 000000000..6e7f13d8f
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/sol2-10.h
@@ -0,0 +1,124 @@
+/* Solaris 10 configuration.
+ Copyright (C) 2004, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+ Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "/"
+
+/* binutils' GNU as understands --32 and --64, but the native Solaris
+ assembler requires -xarch=generic or -xarch=generic64 instead. */
+#undef ASM_SPEC
+#ifdef USE_GAS
+#define ASM_SPEC "%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Yd,*} " \
+ "%{Wa,*:%*} %{m32:--32} %{m64:--64} -s %(asm_cpu)"
+#else
+#define ASM_SPEC "%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Yd,*} " \
+ "%{Wa,*:%*} %{m32:-xarch=generic} %{m64:-xarch=generic64} " \
+ "-s %(asm_cpu)"
+#endif
+
+/* The native Solaris assembler can't calculate the difference between
+ symbols in different sections, which causes problems for -fPIC jump
+ tables in .rodata. */
+#ifndef HAVE_AS_IX86_DIFF_SECT_DELTA
+#undef JUMP_TABLES_IN_TEXT_SECTION
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+/* The native Solaris assembler cannot handle the SYMBOL-. syntax, but
+ requires SYMBOL@rel/@rel64 instead. */
+#define ASM_OUTPUT_DWARF_PCREL(FILE, SIZE, LABEL) \
+ do { \
+ fputs (integer_asm_op (SIZE, FALSE), FILE); \
+ assemble_name (FILE, LABEL); \
+ fputs (SIZE == 8 ? "@rel64" : "@rel", FILE); \
+ } while (0)
+#endif
+
+#undef NO_PROFILE_COUNTERS
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME "_mcount"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE (TARGET_64BIT ? "int" : "long int")
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE (TARGET_64BIT ? "int" : "long int")
+#undef WINT_TYPE_SIZE
+#define WINT_TYPE_SIZE 32
+
+#define SUBTARGET_OVERRIDE_OPTIONS \
+ do \
+ { \
+ if (flag_omit_frame_pointer == 2) \
+ flag_omit_frame_pointer = 0; \
+ } \
+ while (0)
+
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_IEEE_FP \
+ | MASK_FLOAT_RETURNS)
+
+#define SUBTARGET_OPTIMIZATION_OPTIONS \
+ do \
+ { \
+ if (optimize >= 1) \
+ target_flags |= MASK_OMIT_LEAF_FRAME_POINTER; \
+ } \
+ while (0)
+
+#define MULTILIB_DEFAULTS { "m32" }
+
+#undef LINK_ARCH64_SPEC_BASE
+#define LINK_ARCH64_SPEC_BASE \
+ "%{G:-G} \
+ %{YP,*} \
+ %{R*} \
+ %{compat-bsd: \
+ %{!YP,*:%{p|pg:-Y P,/usr/ucblib/64:/usr/lib/libp/64:/lib/64:/usr/lib/64} \
+ %{!p:%{!pg:-Y P,/usr/ucblib/64:/lib:/usr/lib/64}}} \
+ -R /usr/ucblib/64} \
+ %{!compat-bsd: \
+ %{!YP,*:%{p|pg:-Y P,/usr/lib/libp/64:/lib/64:/usr/lib/64} \
+ %{!p:%{!pg:-Y P,/lib/64:/usr/lib/64}}}}"
+
+#undef LINK_ARCH64_SPEC
+#define LINK_ARCH64_SPEC LINK_ARCH64_SPEC_BASE
+
+#ifdef TARGET_GNU_LD
+#define TARGET_LD_EMULATION "%{m64:-m elf_x86_64}%{!m64:-m elf_i386} "
+#else
+#define TARGET_LD_EMULATION ""
+#endif
+
+#undef LINK_ARCH_SPEC
+#define LINK_ARCH_SPEC TARGET_LD_EMULATION \
+ "%{m64:" LINK_ARCH64_SPEC "}%{!m64:" LINK_ARCH32_SPEC "}"
+
+/* We do not need to search a special directory for startup files. */
+#undef MD_STARTFILE_PREFIX
+
+#undef TARGET_ASM_NAMED_SECTION
+#define TARGET_ASM_NAMED_SECTION i386_solaris_elf_named_section
+
+#undef SUBTARGET_RETURN_IN_MEMORY
+#define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \
+ ix86_sol10_return_in_memory (TYPE, FNTYPE)
diff --git a/gcc-4.4.0/gcc/config/i386/sol2-c1.asm b/gcc-4.4.0/gcc/config/i386/sol2-c1.asm
new file mode 100644
index 000000000..4a89530cc
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/sol2-c1.asm
@@ -0,0 +1,151 @@
+! crt1.s for Solaris 2, x86
+
+! Copyright (C) 1993, 1998, 2008, 2009 Free Software Foundation, Inc.
+! Written By Fred Fish, Nov 1992
+!
+! This file is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by the
+! Free Software Foundation; either version 3, or (at your option) any
+! later version.
+!
+! This file is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+! General Public License for more details.
+!
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+!
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+! <http://www.gnu.org/licenses/>.
+
+
+! This file takes control of the process from the kernel, as specified
+! in section 3 of the System V Application Binary Interface, Intel386
+! Processor Supplement. It has been constructed from information obtained
+! from the ABI, information obtained from single stepping existing
+! Solaris executables through their startup code with gdb, and from
+! information obtained by single stepping executables on other i386 SVR4
+! implementations. This file is the first thing linked into any executable.
+
+ .ident "GNU C crt1.s"
+ .weak _cleanup
+ .weak _DYNAMIC
+ .text
+
+! Start creating the initial frame by pushing a NULL value for the return
+! address of the initial frame, and mark the end of the stack frame chain
+! (the innermost stack frame) with a NULL value, per page 3-32 of the ABI.
+! Initialize the first stack frame pointer in %ebp (the contents of which
+! are unspecified at process initialization).
+
+ .globl _start
+_start:
+ pushl $0x0
+ pushl $0x0
+ movl %esp,%ebp
+
+! As specified per page 3-32 of the ABI, %edx contains a function
+! pointer that should be registered with atexit(), for proper
+! shared object termination. Just push it onto the stack for now
+! to preserve it. We want to register _cleanup() first.
+
+ pushl %edx
+
+! Check to see if there is an _cleanup() function linked in, and if
+! so, register it with atexit() as the last thing to be run by
+! atexit().
+
+ movl $_cleanup,%eax
+ testl %eax,%eax
+ je .L1
+ pushl $_cleanup
+ call atexit
+ addl $0x4,%esp
+.L1:
+
+! Now check to see if we have an _DYNAMIC table, and if so then
+! we need to register the function pointer previously in %edx, but
+! now conveniently saved on the stack as the argument to pass to
+! atexit().
+
+ movl $_DYNAMIC,%eax
+ testl %eax,%eax
+ je .L2
+ call atexit
+.L2:
+
+! Register _fini() with atexit(). We will take care of calling _init()
+! directly.
+
+ pushl $_fini
+ call atexit
+
+! Compute the address of the environment vector on the stack and load
+! it into the global variable _environ. Currently argc is at 8 off
+! the frame pointer. Fetch the argument count into %eax, scale by the
+! size of each arg (4 bytes) and compute the address of the environment
+! vector which is 16 bytes (the two zero words we pushed, plus argc,
+! plus the null word terminating the arg vector) further up the stack,
+! off the frame pointer (whew!).
+
+ movl 8(%ebp),%eax
+ leal 16(%ebp,%eax,4),%edx
+ movl %edx,_environ
+
+! Push the environment vector pointer, the argument vector pointer,
+! and the argument count on to the stack to set up the arguments
+! for _init(), _fpstart(), and main(). Note that the environment
+! vector pointer and the arg count were previously loaded into
+! %edx and %eax respectively. The only new value we need to compute
+! is the argument vector pointer, which is at a fixed address off
+! the initial frame pointer.
+
+!
+! Make sure the stack is properly aligned.
+!
+ andl $0xfffffff0,%esp
+ subl $4,%esp
+
+ pushl %edx
+ leal 12(%ebp),%edx
+ pushl %edx
+ pushl %eax
+
+! Call _init(argc, argv, environ), _fpstart(argc, argv, environ), and
+! main(argc, argv, environ).
+
+ call _init
+ call __fpstart
+ call main
+
+! Pop the argc, argv, and environ arguments off the stack, push the
+! value returned from main(), and call exit().
+
+ addl $12,%esp
+ pushl %eax
+ call exit
+
+! An inline equivalent of _exit, as specified in Figure 3-26 of the ABI.
+
+ pushl $0x0
+ movl $0x1,%eax
+ lcall $7,$0
+
+! If all else fails, just try a halt!
+
+ hlt
+ .type _start,@function
+ .size _start,.-_start
+
+! A dummy profiling support routine for non-profiling executables,
+! in case we link in some objects that have been compiled for profiling.
+
+ .weak _mcount
+_mcount:
+ ret
+ .type _mcount,@function
+ .size _mcount,.-_mcount
diff --git a/gcc-4.4.0/gcc/config/i386/sol2-ci.asm b/gcc-4.4.0/gcc/config/i386/sol2-ci.asm
new file mode 100644
index 000000000..f2ff2025d
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/sol2-ci.asm
@@ -0,0 +1,40 @@
+! crti.s for Solaris 2, x86.
+
+! Copyright (C) 1993, 2008, 2009 Free Software Foundation, Inc.
+! Written By Fred Fish, Nov 1992
+!
+! This file is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by the
+! Free Software Foundation; either version 3, or (at your option) any
+! later version.
+!
+! This file is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+! General Public License for more details.
+!
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+!
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+! <http://www.gnu.org/licenses/>.
+
+
+! This file just supplies labeled starting points for the .init and .fini
+! sections. It is linked in before the values-Xx.o files and also before
+! crtbegin.o.
+
+ .ident "GNU C crti.s"
+
+ .section .init
+ .globl _init
+ .type _init,@function
+_init:
+
+ .section .fini
+ .globl _fini
+ .type _fini,@function
+_fini:
diff --git a/gcc-4.4.0/gcc/config/i386/sol2-cn.asm b/gcc-4.4.0/gcc/config/i386/sol2-cn.asm
new file mode 100644
index 000000000..217f04091
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/sol2-cn.asm
@@ -0,0 +1,35 @@
+! crtn.s for Solaris 2, x86.
+
+! Copyright (C) 1993, 2008, 2009 Free Software Foundation, Inc.
+! Written By Fred Fish, Nov 1992
+!
+! This file is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by the
+! Free Software Foundation; either version 3, or (at your option) any
+! later version.
+!
+! This file is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+! General Public License for more details.
+!
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+!
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+! <http://www.gnu.org/licenses/>.
+
+
+! This file just supplies returns for the .init and .fini sections. It is
+! linked in after all other files.
+
+ .ident "GNU C crtn.o"
+
+ .section .init
+ ret $0x0
+
+ .section .fini
+ ret $0x0
diff --git a/gcc-4.4.0/gcc/config/i386/sol2-gc1.asm b/gcc-4.4.0/gcc/config/i386/sol2-gc1.asm
new file mode 100644
index 000000000..8cb989a9c
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/sol2-gc1.asm
@@ -0,0 +1,155 @@
+! gcrt1.s for Solaris 2, x86
+
+! Copyright (C) 1993, 2008, 2009 Free Software Foundation, Inc.
+! Written By Fred Fish, Nov 1992
+!
+! This file is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by the
+! Free Software Foundation; either version 3, or (at your option) any
+! later version.
+!
+! This file is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+! General Public License for more details.
+!
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+!
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+! <http://www.gnu.org/licenses/>.
+
+
+! This file takes control of the process from the kernel, as specified
+! in section 3 of the System V Application Binary Interface, Intel386
+! Processor Supplement. It has been constructed from information obtained
+! from the ABI, information obtained from single stepping existing
+! Solaris executables through their startup code with gdb, and from
+! information obtained by single stepping executables on other i386 SVR4
+! implementations. This file is the first thing linked into any executable.
+
+! This is a modified crt1.s by J.W.Hawtin <oolon@ankh.org> 15/8/96,
+! to allow program profiling, by calling monstartup on entry and _mcleanup
+! on exit
+
+ .ident "GNU C gcrt1.s"
+ .weak _DYNAMIC
+ .text
+
+! Start creating the initial frame by pushing a NULL value for the return
+! address of the initial frame, and mark the end of the stack frame chain
+! (the innermost stack frame) with a NULL value, per page 3-32 of the ABI.
+! Initialize the first stack frame pointer in %ebp (the contents of which
+! are unspecified at process initialization).
+
+ .globl _start
+_start:
+ pushl $0x0
+ pushl $0x0
+ movl %esp,%ebp
+
+! As specified per page 3-32 of the ABI, %edx contains a function
+! pointer that should be registered with atexit(), for proper
+! shared object termination. Just push it onto the stack for now
+! to preserve it. We want to register _cleanup() first.
+
+ pushl %edx
+
+! Check to see if there is an _cleanup() function linked in, and if
+! so, register it with atexit() as the last thing to be run by
+! atexit().
+
+ movl $_mcleanup,%eax
+ testl %eax,%eax
+ je .L1
+ pushl $_mcleanup
+ call atexit
+ addl $0x4,%esp
+.L1:
+
+! Now check to see if we have an _DYNAMIC table, and if so then
+! we need to register the function pointer previously in %edx, but
+! now conveniently saved on the stack as the argument to pass to
+! atexit().
+
+ movl $_DYNAMIC,%eax
+ testl %eax,%eax
+ je .L2
+ call atexit
+.L2:
+
+! Register _fini() with atexit(). We will take care of calling _init()
+! directly.
+
+ pushl $_fini
+ call atexit
+
+! Start profiling
+
+ pushl %ebp
+ movl %esp,%ebp
+ pushl $_etext
+ pushl $_start
+ call monstartup
+ addl $8,%esp
+ popl %ebp
+
+! Compute the address of the environment vector on the stack and load
+! it into the global variable _environ. Currently argc is at 8 off
+! the frame pointer. Fetch the argument count into %eax, scale by the
+! size of each arg (4 bytes) and compute the address of the environment
+! vector which is 16 bytes (the two zero words we pushed, plus argc,
+! plus the null word terminating the arg vector) further up the stack,
+! off the frame pointer (whew!).
+
+ movl 8(%ebp),%eax
+ leal 16(%ebp,%eax,4),%edx
+ movl %edx,_environ
+
+! Push the environment vector pointer, the argument vector pointer,
+! and the argument count on to the stack to set up the arguments
+! for _init(), _fpstart(), and main(). Note that the environment
+! vector pointer and the arg count were previously loaded into
+! %edx and %eax respectively. The only new value we need to compute
+! is the argument vector pointer, which is at a fixed address off
+! the initial frame pointer.
+
+!
+! Make sure the stack is properly aligned.
+!
+ andl $0xfffffff0,%esp
+ subl $4,%esp
+
+ pushl %edx
+ leal 12(%ebp),%edx
+ pushl %edx
+ pushl %eax
+
+! Call _init(argc, argv, environ), _fpstart(argc, argv, environ), and
+! main(argc, argv, environ).
+
+ call _init
+ call __fpstart
+ call main
+
+! Pop the argc, argv, and environ arguments off the stack, push the
+! value returned from main(), and call exit().
+
+ addl $12,%esp
+ pushl %eax
+ call exit
+
+! An inline equivalent of _exit, as specified in Figure 3-26 of the ABI.
+
+ pushl $0x0
+ movl $0x1,%eax
+ lcall $7,$0
+
+! If all else fails, just try a halt!
+
+ hlt
+ .type _start,@function
+ .size _start,.-_start
diff --git a/gcc-4.4.0/gcc/config/i386/sol2.h b/gcc-4.4.0/gcc/config/i386/sol2.h
new file mode 100644
index 000000000..1d21cd9c0
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/sol2.h
@@ -0,0 +1,114 @@
+/* Target definitions for GCC for Intel 80386 running Solaris 2
+ Copyright (C) 1993, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+ 2004, 2007, 2008 Free Software Foundation, Inc.
+ Contributed by Fred Fish (fnf@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* The Solaris 2.0 x86 linker botches alignment of code sections.
+ It tries to align to a 16 byte boundary by padding with 0x00000090
+ ints, rather than 0x90 bytes (nop). This generates trash in the
+ ".init" section since the contribution from crtbegin.o is only 7
+ bytes. The linker pads it to 16 bytes with a single 0x90 byte, and
+ two 0x00000090 ints, which generates a segmentation violation when
+ executed. This macro forces the assembler to do the padding, since
+ it knows what it is doing. */
+#define FORCE_CODE_SECTION_ALIGN asm(ALIGN_ASM_OP "16");
+
+/* Old versions of the Solaris assembler can not handle the difference of
+ labels in different sections, so force DW_EH_PE_datarel. */
+#undef ASM_PREFERRED_EH_DATA_FORMAT
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \
+ (flag_pic ? ((GLOBAL ? DW_EH_PE_indirect : 0) \
+ | (TARGET_64BIT ? DW_EH_PE_pcrel | DW_EH_PE_sdata4 \
+ : DW_EH_PE_datarel)) \
+ : DW_EH_PE_absptr)
+
+/* The Solaris linker will not merge a read-only .eh_frame section
+ with a read-write .eh_frame section. None of the encodings used
+ with non-PIC code require runtime relocations. In 64-bit mode,
+ since there is no backwards compatibility issue, we use a read-only
+ section for .eh_frame. In 32-bit mode, we use a writable .eh_frame
+ section in order to be compatible with G++ for Solaris x86. */
+#undef EH_TABLES_CAN_BE_READ_ONLY
+#define EH_TABLES_CAN_BE_READ_ONLY (TARGET_64BIT)
+
+/* Solaris 2/Intel as chokes on #line directives. */
+#undef CPP_SPEC
+#define CPP_SPEC "%{,assembler-with-cpp:-P} %(cpp_subtarget)"
+
+/* FIXME: Removed -K PIC from generic Solaris 2 ASM_SPEC: the native assembler
+ gives many warnings: R_386_32 relocation is used for symbol ".text". */
+#undef ASM_SPEC
+#define ASM_SPEC "\
+%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Wa,*:%*} -s \
+%(asm_cpu) \
+"
+
+#define ASM_CPU_SPEC ""
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+ { "cpp_subtarget", CPP_SUBTARGET_SPEC }, \
+ { "asm_cpu", ASM_CPU_SPEC }, \
+ { "startfile_arch", STARTFILE_ARCH_SPEC }, \
+ { "link_arch", LINK_ARCH_SPEC }
+
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* The 32-bit Solaris assembler does not support .quad. Do not use it. */
+#ifndef TARGET_BI_ARCH
+#undef ASM_QUAD
+#endif
+
+/* The Solaris assembler wants a .local for non-exported aliases. */
+#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET) \
+ do { \
+ const char *declname = \
+ IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (DECL)); \
+ ASM_OUTPUT_DEF ((FILE), declname, \
+ IDENTIFIER_POINTER (TARGET)); \
+ if (! TREE_PUBLIC (DECL)) \
+ { \
+ fprintf ((FILE), "%s", LOCAL_ASM_OP); \
+ assemble_name ((FILE), declname); \
+ fprintf ((FILE), "\n"); \
+ } \
+ } while (0)
+
+/* Solaris-specific #pragmas are implemented on top of attributes. Hook in
+ the bits from config/sol2.c. */
+#define SUBTARGET_INSERT_ATTRIBUTES solaris_insert_attributes
+#define SUBTARGET_ATTRIBUTE_TABLE SOLARIS_ATTRIBUTE_TABLE
+
+/* Register the Solaris-specific #pragma directives. */
+#define REGISTER_SUBTARGET_PRAGMAS() solaris_register_pragmas ()
+
+/* Output a simple call for .init/.fini. */
+#define ASM_OUTPUT_CALL(FILE, FN) \
+ do \
+ { \
+ fprintf (FILE, "\tcall\t"); \
+ print_operand (FILE, XEXP (DECL_RTL (FN), 0), 'P'); \
+ fprintf (FILE, "\n"); \
+ } \
+ while (0)
+
+/* We do not need NT_VERSION notes. */
+#undef X86_FILE_START_VERSION_DIRECTIVE
+#define X86_FILE_START_VERSION_DIRECTIVE false
diff --git a/gcc-4.4.0/gcc/config/i386/sse.md b/gcc-4.4.0/gcc/config/i386/sse.md
new file mode 100644
index 000000000..e6d1fd14b
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/sse.md
@@ -0,0 +1,11980 @@
+;; GCC machine description for SSE instructions
+;; Copyright (C) 2005, 2006, 2007, 2008, 2009
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; 16 byte integral modes handled by SSE, minus TImode, which gets
+;; special-cased for TARGET_64BIT.
+(define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
+
+;; All 16-byte vector modes handled by SSE
+(define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
+
+;; 32 byte integral vector modes handled by AVX
+(define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
+
+;; All 32-byte vector modes handled by AVX
+(define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
+
+;; All QI vector modes handled by AVX
+(define_mode_iterator AVXMODEQI [V32QI V16QI])
+
+;; All DI vector modes handled by AVX
+(define_mode_iterator AVXMODEDI [V4DI V2DI])
+
+;; All vector modes handled by AVX
+(define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
+
+;; Mix-n-match
+(define_mode_iterator SSEMODE12 [V16QI V8HI])
+(define_mode_iterator SSEMODE24 [V8HI V4SI])
+(define_mode_iterator SSEMODE14 [V16QI V4SI])
+(define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
+(define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
+(define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
+(define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
+(define_mode_iterator SSEMODEF2P [V4SF V2DF])
+
+(define_mode_iterator AVX256MODEF2P [V8SF V4DF])
+(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
+(define_mode_iterator AVX256MODE4P [V4DI V4DF])
+(define_mode_iterator AVX256MODE8P [V8SI V8SF])
+(define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
+(define_mode_iterator AVXMODEF4P [V4SF V4DF])
+(define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
+(define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
+
+;; Int-float size matches
+(define_mode_iterator SSEMODE4S [V4SF V4SI])
+(define_mode_iterator SSEMODE2D [V2DF V2DI])
+
+;; Mapping from float mode to required SSE level
+(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
+
+;; Mapping from integer vector mode to mnemonic suffix
+(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
+
+;; Mapping of the sse5 suffix
+(define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
+ (V4SF "ps") (V2DF "pd")])
+(define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
+ (V4SF "ss") (V2DF "sd")])
+(define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
+
+;; Mapping of the max integer size for sse5 rotate immediate constraint
+(define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
+
+;; Mapping of vector modes back to the scalar modes
+(define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
+ (V16QI "QI") (V8HI "HI")
+ (V4SI "SI") (V2DI "DI")])
+
+;; Mapping of vector modes to a vector mode of double size
+(define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI")
+ (V4SF "V8SF") (V4SI "V8SI")])
+
+;; Number of scalar elements in each vector type
+(define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2")
+ (V16QI "16") (V8HI "8")
+ (V4SI "4") (V2DI "2")])
+
+;; Mapping for AVX
+(define_mode_attr avxvecmode
+ [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF")
+ (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
+ (V8SF "V8SF") (V4DF "V4DF")])
+(define_mode_attr avxvecpsmode
+ [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
+ (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
+(define_mode_attr avxhalfvecmode
+ [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
+ (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
+(define_mode_attr avxscalarmode
+ [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF")
+ (V8SF "SF") (V4DF "DF")])
+(define_mode_attr avxcvtvecmode
+ [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
+(define_mode_attr avxpermvecmode
+ [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
+(define_mode_attr avxmodesuffixf2c
+ [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")])
+(define_mode_attr avxmodesuffixp
+ [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
+ (V4DF "pd")])
+(define_mode_attr avxmodesuffixs
+ [(V16QI "b") (V8HI "w") (V4SI "d")])
+(define_mode_attr avxmodesuffix
+ [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
+ (V8SI "256") (V8SF "256") (V4DF "256")])
+
+;; Mapping of immediate bits for blend instructions
+(define_mode_attr blendbits
+ [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
+
+;; Mapping of immediate bits for vpermil instructions
+(define_mode_attr vpermilbits
+ [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
+
+;; Mapping of immediate bits for pinsr instructions
+(define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
+
+;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Move patterns
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "mov<mode>"
+ [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
+ (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
+ "TARGET_AVX"
+{
+ ix86_expand_vector_move (<MODE>mode, operands);
+ DONE;
+})
+
+(define_insn "*avx_mov<mode>_internal"
+ [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m")
+ (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
+ "TARGET_AVX
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[1], <MODE>mode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return standard_sse_constant_opcode (insn, operands[1]);
+ case 1:
+ case 2:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8SF:
+ case MODE_V4SF:
+ return "vmovaps\t{%1, %0|%0, %1}";
+ case MODE_V4DF:
+ case MODE_V2DF:
+ return "vmovapd\t{%1, %0|%0, %1}";
+ default:
+ return "vmovdqa\t{%1, %0|%0, %1}";
+ }
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+;; All of these patterns are enabled for SSE1 as well as SSE2.
+;; This is essential for maintaining stable calling conventions.
+
+(define_expand "mov<mode>"
+ [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
+ (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_move (<MODE>mode, operands);
+ DONE;
+})
+
+(define_insn "*mov<mode>_internal"
+ [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m")
+ (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
+ "TARGET_SSE
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[1], <MODE>mode))"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return standard_sse_constant_opcode (insn, operands[1]);
+ case 1:
+ case 2:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V4SF:
+ return "movaps\t{%1, %0|%0, %1}";
+ case MODE_V2DF:
+ return "movapd\t{%1, %0|%0, %1}";
+ default:
+ return "movdqa\t{%1, %0|%0, %1}";
+ }
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog1,ssemov,ssemov")
+ (set (attr "mode")
+ (cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
+ (and (eq_attr "alternative" "2")
+ (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))))
+ (const_string "V4SF")
+ (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
+ (const_string "V4SF")
+ (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
+ (const_string "V2DF")
+ ]
+ (const_string "TI")))])
+
+;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
+;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
+;; from memory, we'd prefer to load the memory directly into the %xmm
+;; register. To facilitate this happy circumstance, this pattern won't
+;; split until after register allocation. If the 64-bit value didn't
+;; come from memory, this is the best we can do. This is much better
+;; than storing %edx:%eax into a stack temporary and loading an %xmm
+;; from there.
+
+(define_insn_and_split "movdi_to_sse"
+ [(parallel
+ [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
+ (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
+ (clobber (match_scratch:V4SI 2 "=&x,X"))])]
+ "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ if (register_operand (operands[1], DImode))
+ {
+ /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+ Assemble the 64-bit DImode value in an xmm register. */
+ emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
+ gen_rtx_SUBREG (SImode, operands[1], 0)));
+ emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
+ gen_rtx_SUBREG (SImode, operands[1], 4)));
+ emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
+ }
+ else if (memory_operand (operands[1], DImode))
+ emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
+ else
+ gcc_unreachable ();
+})
+
+(define_split
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
+ "TARGET_SSE && reload_completed"
+ [(set (match_dup 0)
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF (match_dup 1))
+ (match_dup 2)
+ (const_int 1)))]
+{
+ operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
+ operands[2] = CONST0_RTX (V4SFmode);
+})
+
+(define_split
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
+{
+ operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
+ operands[2] = CONST0_RTX (DFmode);
+})
+
+(define_expand "push<mode>1"
+ [(match_operand:AVX256MODE 0 "register_operand" "")]
+ "TARGET_AVX"
+{
+ ix86_expand_push (<MODE>mode, operands[0]);
+ DONE;
+})
+
+(define_expand "push<mode>1"
+ [(match_operand:SSEMODE 0 "register_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_push (<MODE>mode, operands[0]);
+ DONE;
+})
+
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
+ (match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
+ "TARGET_AVX"
+{
+ ix86_expand_vector_move_misalign (<MODE>mode, operands);
+ DONE;
+})
+
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
+ (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_move_misalign (<MODE>mode, operands);
+ DONE;
+})
+
+(define_insn "avx_movup<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sse2_movq128"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_concat:V2DI
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (const_int 0)))]
+ "TARGET_SSE2"
+ "%vmovq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "<sse>_movup<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_movdqu<avxmodesuffix>"
+ [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
+ (unspec:AVXMODEQI
+ [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "vmovdqu\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse2_movdqu"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+ (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
+ UNSPEC_MOVU))]
+ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "movdqu\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx_movnt<mode>"
+ [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vmovntp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_movnt<mode>"
+ [(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "movntp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_movnt<mode>"
+ [(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
+ (unspec:AVXMODEDI
+ [(match_operand:AVXMODEDI 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "TARGET_AVX"
+ "vmovntdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse2_movntv2di"
+ [(set (match_operand:V2DI 0 "memory_operand" "=m")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE2"
+ "movntdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_movntsi"
+ [(set (match_operand:SI 0 "memory_operand" "=m")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE2"
+ "movnti\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "avx_lddqu<avxmodesuffix>"
+ [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
+ (unspec:AVXMODEQI
+ [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
+ UNSPEC_LDDQU))]
+ "TARGET_AVX"
+ "vlddqu\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse3_lddqu"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
+ UNSPEC_LDDQU))]
+ "TARGET_SSE3"
+ "lddqu\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "TI")])
+
+; Expand patterns for non-temporal stores. At the moment, only those
+; that directly map to insns are defined; it would be possible to
+; define patterns for other modes that would expand to several insns.
+
+(define_expand "storent<mode>"
+ [(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "")]
+ UNSPEC_MOVNT))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "")
+
+(define_expand "storent<mode>"
+ [(set (match_operand:MODEF 0 "memory_operand" "")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE4A"
+ "")
+
+(define_expand "storentv2di"
+ [(set (match_operand:V2DI 0 "memory_operand" "")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE2"
+ "")
+
+(define_expand "storentsi"
+ [(set (match_operand:SI 0 "memory_operand" "")
+ (unspec:SI [(match_operand:SI 1 "register_operand" "")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE2"
+ "")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel floating point arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "<code><mode>2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (absneg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+(define_expand "<plusminus_insn><mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (plusminus:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx_<plusminus_insn><mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (plusminus:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "<comm>x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "v<plusminus_mnemonic>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_expand "<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (plusminus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (plusminus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "<comm>0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "<plusminus_mnemonic>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_vm<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (plusminus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<sse>_vm<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (plusminus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "mul<mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (mult:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
+
+(define_insn "*avx_mul<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (mult:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+ "vmulp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_expand "mul<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
+
+(define_insn "*mul<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+ "mulp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemul")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_vmmul<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<sse>_vmmul<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "muls<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemul")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "divv8sf3"
+ [(set (match_operand:V8SF 0 "register_operand" "")
+ (div:V8SF (match_operand:V8SF 1 "register_operand" "")
+ (match_operand:V8SF 2 "nonimmediate_operand" "")))]
+ "TARGET_AVX"
+{
+ ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
+
+ if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations)
+ {
+ ix86_emit_swdivsf (operands[0], operands[1],
+ operands[2], V8SFmode);
+ DONE;
+ }
+})
+
+(define_expand "divv4df3"
+ [(set (match_operand:V4DF 0 "register_operand" "")
+ (div:V4DF (match_operand:V4DF 1 "register_operand" "")
+ (match_operand:V4DF 2 "nonimmediate_operand" "")))]
+ "TARGET_AVX"
+ "ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
+
+(define_insn "avx_div<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (div:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vdivp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "divv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (div:V4SF (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+{
+ if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations)
+ {
+ ix86_emit_swdivsf (operands[0], operands[1],
+ operands[2], V4SFmode);
+ DONE;
+ }
+})
+
+(define_expand "divv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (div:V2DF (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "")
+
+(define_insn "*avx_div<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (div:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_div<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (div:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "divp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_vmdiv<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (div:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vdivs<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<sse>_vmdiv<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (div:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "divs<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssediv")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx_rcpv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (unspec:V8SF
+ [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
+ "TARGET_AVX"
+ "vrcpps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "sse_rcpv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
+ "TARGET_SSE"
+ "%vrcpps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_vmrcpv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RCP)
+ (match_operand:V4SF 2 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vrcpss\t{%1, %2, %0|%0, %2, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse_vmrcpv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RCP)
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "rcpss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
+
+(define_expand "sqrtv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "")
+ (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
+ "TARGET_AVX"
+{
+ if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations)
+ {
+ ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
+ DONE;
+ }
+})
+
+(define_insn "avx_sqrtv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vsqrtps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_expand "sqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+{
+ if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
+ && flag_finite_math_only && !flag_trapping_math
+ && flag_unsafe_math_optimizations)
+ {
+ ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
+ DONE;
+ }
+})
+
+(define_insn "sse_sqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE"
+ "%vsqrtps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sqrtv4df2"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vsqrtpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "sqrtv2df2"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "%vsqrtpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*avx_vmsqrt<mode>2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (sqrt:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
+ (match_operand:SSEMODEF2P 2 "register_operand" "x")
+ (const_int 1)))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vsqrts<ssemodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<sse>_vmsqrt<mode>2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (sqrt:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
+ (match_operand:SSEMODEF2P 2 "register_operand" "0")
+ (const_int 1)))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "rsqrtv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "")
+ (unspec:V8SF
+ [(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
+ "TARGET_AVX && TARGET_SSE_MATH"
+{
+ ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
+ DONE;
+})
+
+(define_insn "avx_rsqrtv8sf2"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (unspec:V8SF
+ [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
+ "TARGET_AVX"
+ "vrsqrtps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_expand "rsqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
+ "TARGET_SSE_MATH"
+{
+ ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
+ DONE;
+})
+
+(define_insn "sse_rsqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
+ "TARGET_SSE"
+ "%vrsqrtps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_vmrsqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RSQRT)
+ (match_operand:V4SF 2 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse_vmrsqrtv4sf2"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_RSQRT)
+ (match_operand:V4SF 2 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "rsqrtss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "SF")])
+
+;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
+;; isn't really correct, as those rtl operators aren't defined when
+;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
+
+(define_expand "<code><mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (smaxmin:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+{
+ if (!flag_finite_math_only)
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
+})
+
+(define_expand "<code><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (smaxmin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+{
+ if (!flag_finite_math_only)
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
+})
+
+(define_insn "*avx_<code><mode>3_finite"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (smaxmin:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "v<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*<code><mode>3_finite"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (smaxmin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (smaxmin:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "*<code><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (smaxmin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_vm<code><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (smaxmin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "AVX128_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<sse>_vm<code><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (smaxmin:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+ (match_dup 1)
+ (const_int 1)))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "mode" "<ssescalarmode>")])
+
+;; These versions of the min/max patterns implement exactly the operations
+;; min = (op1 < op2 ? op1 : op2)
+;; max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*avx_ieee_smin<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vminp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "*avx_ieee_smax<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vmaxp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "*ieee_smin<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "minp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*ieee_smax<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "maxp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_addsubv8sf3"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_merge:V8SF
+ (plus:V8SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (minus:V8SF (match_dup 1) (match_dup 2))
+ (const_int 170)))]
+ "TARGET_AVX"
+ "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "avx_addsubv4df3"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_merge:V4DF
+ (plus:V4DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (minus:V4DF (match_dup 1) (match_dup 2))
+ (const_int 10)))]
+ "TARGET_AVX"
+ "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "*avx_addsubv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (plus:V4SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (minus:V4SF (match_dup 1) (match_dup 2))
+ (const_int 10)))]
+ "TARGET_AVX"
+ "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse3_addsubv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (plus:V4SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (minus:V4SF (match_dup 1) (match_dup 2))
+ (const_int 10)))]
+ "TARGET_SSE3"
+ "addsubps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_addsubv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (plus:V2DF
+ (match_operand:V2DF 1 "register_operand" "x")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (minus:V2DF (match_dup 1) (match_dup 2))
+ (const_int 2)))]
+ "TARGET_AVX"
+ "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_addsubv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (plus:V2DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (minus:V2DF (match_dup 1) (match_dup 2))
+ (const_int 2)))]
+ "TARGET_SSE3"
+ "addsubpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "avx_h<plusminus_insn>v4df3"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_concat:V4DF
+ (vec_concat:V2DF
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+ (plusminus:DF
+ (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2DF
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
+ (plusminus:DF
+ (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_AVX"
+ "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "avx_h<plusminus_insn>v8sf3"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_concat:V8SF
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_AVX"
+ "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "*avx_h<plusminus_insn>v4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_AVX"
+ "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse3_h<plusminus_insn>v4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SF
+ (plusminus:SF
+ (vec_select:SF
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
+ (plusminus:SF
+ (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSE3"
+ "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_h<plusminus_insn>v2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_AVX"
+ "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse3_h<plusminus_insn>v2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
+ (plusminus:DF
+ (vec_select:DF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_SSE3"
+ "h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_expand "reduc_splus_v4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")]
+ "TARGET_SSE"
+{
+ if (TARGET_SSE3)
+ {
+ rtx tmp = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
+ emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
+ }
+ else
+ ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "reduc_splus_v2df"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "register_operand" "")]
+ "TARGET_SSE3"
+{
+ emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
+ DONE;
+})
+
+(define_expand "reduc_smax_v4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
+ DONE;
+})
+
+(define_expand "reduc_smin_v4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel floating point comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "avx_cmpp<avxmodesuffixf2c><mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_31_operand" "n")]
+ UNSPEC_PCMP))]
+ "TARGET_AVX"
+ "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (vec_merge:SSEMODEF2P
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_31_operand" "n")]
+ UNSPEC_PCMP)
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+;; We don't promote 128bit vector compare intrinsics. But vectorizer
+;; may generate 256bit vector compare instructions.
+(define_insn "*avx_maskcmp<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "<sse>_maskcmp<mode>3"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
+ (match_operator:SSEMODEF4 3 "sse_comparison_operator"
+ [(match_operand:SSEMODEF4 1 "register_operand" "0")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
+ "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))
+ && !TARGET_SSE5"
+ "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_vmmaskcmp<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
+ [(match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
+ (match_dup 1)
+ (const_int 1)))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
+ "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "<sse>_comi"
+ [(set (reg:CCFP FLAGS_REG)
+ (compare:CCFP
+ (vec_select:MODEF
+ (match_operand:<ssevecmode> 0 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:MODEF
+ (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))))]
+ "SSE_FLOAT_MODE_P (<MODE>mode)"
+ "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_ucomi"
+ [(set (reg:CCFPU FLAGS_REG)
+ (compare:CCFPU
+ (vec_select:MODEF
+ (match_operand:<ssevecmode> 0 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:MODEF
+ (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))))]
+ "SSE_FLOAT_MODE_P (<MODE>mode)"
+ "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "vcond<mode>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (if_then_else:SSEMODEF2P
+ (match_operator 3 ""
+ [(match_operand:SSEMODEF2P 4 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 5 "nonimmediate_operand" "")])
+ (match_operand:SSEMODEF2P 1 "general_operand" "")
+ (match_operand:SSEMODEF2P 2 "general_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+{
+ if (ix86_expand_fp_vcond (operands))
+ DONE;
+ else
+ FAIL;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel floating point logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "avx_andnot<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (and:AVXMODEF2P
+ (not:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "register_operand" "x"))
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vandnp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "<sse>_andnot<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (and:SSEMODEF2P
+ (not:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0"))
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "andnp<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "<code><mode>3"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
+ (plogic:AVX256MODEF2P
+ (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (plogic:AVXMODEF2P
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_expand "<code><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (plogic:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*<code><mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (plogic:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "<MODE>")])
+
+;; Also define scalar versions. These are used for abs, neg, and
+;; conditional move. Using subregs into vector modes causes register
+;; allocation lossage. These patterns do not allow memory operands
+;; because the native instructions read the full 128-bits.
+
+(define_insn "*avx_andnot<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (and:MODEF
+ (not:MODEF
+ (match_operand:MODEF 1 "register_operand" "x"))
+ (match_operand:MODEF 2 "register_operand" "x")))]
+ "AVX_FLOAT_MODE_P (<MODE>mode)"
+ "vandnp<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssevecmode>")])
+
+(define_insn "*andnot<mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (and:MODEF
+ (not:MODEF
+ (match_operand:MODEF 1 "register_operand" "0"))
+ (match_operand:MODEF 2 "register_operand" "x")))]
+ "SSE_FLOAT_MODE_P (<MODE>mode)"
+ "andnp<ssemodefsuffix>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "<ssevecmode>")])
+
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (plogic:MODEF
+ (match_operand:MODEF 1 "register_operand" "x")
+ (match_operand:MODEF 2 "register_operand" "x")))]
+ "AVX_FLOAT_MODE_P (<MODE>mode)"
+ "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<ssevecmode>")])
+
+(define_insn "*<code><mode>3"
+ [(set (match_operand:MODEF 0 "register_operand" "=x")
+ (plogic:MODEF
+ (match_operand:MODEF 1 "register_operand" "0")
+ (match_operand:MODEF 2 "register_operand" "x")))]
+ "SSE_FLOAT_MODE_P (<MODE>mode)"
+ "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "<ssevecmode>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; SSE5 floating point multiply/accumulate instructions This includes the
+;; scalar version of the instructions as well as the vector
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
+;; combine to generate a multiply/add with two memory references. We then
+;; split this insn, into loading up the destination register with one of the
+;; memory operations. If we don't manage to split the insn, reload will
+;; generate the appropriate moves. The reason this is needed, is that combine
+;; has already folded one of the memory references into both the multiply and
+;; add insns, and it can't generate a new pseudo. I.e.:
+;; (set (reg1) (mem (addr1)))
+;; (set (reg2) (mult (reg1) (mem (addr2))))
+;; (set (reg3) (plus (reg2) (mem (addr3))))
+
+(define_insn "sse5_fmadd<mode>4"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
+ (plus:SSEMODEF4
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
+ "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Split fmadd with two memory operands into a load and the fmadd.
+(define_split
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "")
+ (plus:SSEMODEF4
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
+ emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are generated.
+;; Scalar version of fmadd
+(define_insn "sse5_vmfmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (vec_merge:SSEMODEF2P
+ (plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
+ "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Floating multiply and subtract
+;; Allow two memory operands the same as fmadd
+(define_insn "sse5_fmsub<mode>4"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
+ (minus:SSEMODEF4
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
+ "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Split fmsub with two memory operands into a load and the fmsub.
+(define_split
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "")
+ (minus:SSEMODEF4
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
+ emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are generated.
+;; Scalar version of fmsub
+(define_insn "sse5_vmfmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
+ "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Floating point negative multiply and add
+;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
+;; Note operands are out of order to simplify call to ix86_sse5_valid_p
+;; Allow two memory operands to help in optimizing.
+(define_insn "sse5_fnmadd<mode>4"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
+ (minus:SSEMODEF4
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)"
+ "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Split fnmadd with two memory operands into a load and the fnmadd.
+(define_split
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "")
+ (minus:SSEMODEF4
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
+ (mult:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
+ emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are generated.
+;; Scalar version of fnmadd
+(define_insn "sse5_vmfnmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
+ "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Floating point negative multiply and subtract
+;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
+;; Allow 2 memory operands to help with optimization
+(define_insn "sse5_fnmsub<mode>4"
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
+ (minus:SSEMODEF4
+ (mult:SSEMODEF4
+ (neg:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0"))
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
+ "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Split fnmsub with two memory operands into a load and the fmsub.
+(define_split
+ [(set (match_operand:SSEMODEF4 0 "register_operand" "")
+ (minus:SSEMODEF4
+ (mult:SSEMODEF4
+ (neg:SSEMODEF4
+ (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
+ (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
+ (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
+ emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are generated.
+;; Scalar version of fnmsub
+(define_insn "sse5_vmfnmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0"))
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_SSE5 && TARGET_FUSED_MADD
+ && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)"
+ "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; The same instructions using an UNSPEC to allow the intrinsic to be used
+;; even if the user used -mno-fused-madd
+;; Parallel instructions. During instruction generation, just default
+;; to registers, and let combine later build the appropriate instruction.
+(define_expand "sse5i_fmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_fmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
+ (unspec:SSEMODEF2P
+ [(plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
+ "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "sse5i_fmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_fmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
+ "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b)
+;; Note operands are out of order to simplify call to ix86_sse5_valid_p
+(define_expand "sse5i_fnmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "register_operand" "")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" "")))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_fnmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
+ "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
+(define_expand "sse5i_fnmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" ""))
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_fnmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
+ (unspec:SSEMODEF2P
+ [(minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm"))
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
+ "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; Scalar instructions
+(define_expand "sse5i_vmfmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))
+ (match_dup 1)
+ (const_int 0))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+;; For the scalar operations, use operand1 for the upper words that aren't
+;; modified, so restrict the forms that are accepted.
+(define_insn "*sse5i_vmfmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (plus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 0)
+ (const_int 0))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
+ "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "sse5i_vmfmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))
+ (match_dup 0)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_vmfmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
+ "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<ssescalarmode>")])
+
+;; Note operands are out of order to simplify call to ix86_sse5_valid_p
+(define_expand "sse5i_vmfnmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "register_operand" "")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "")
+ (match_operand:SSEMODEF2P 2 "register_operand" "")))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_vmfnmadd<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
+ (mult:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
+ "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "sse5i_vmfnmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" ""))
+ (match_operand:SSEMODEF2P 2 "register_operand" ""))
+ (match_operand:SSEMODEF2P 3 "register_operand" ""))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5"
+{
+ /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
+ if (TARGET_FUSED_MADD)
+ {
+ emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+ }
+})
+
+(define_insn "*sse5i_vmfnmsub<mode>4"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
+ (unspec:SSEMODEF2P
+ [(vec_merge:SSEMODEF2P
+ (minus:SSEMODEF2P
+ (mult:SSEMODEF2P
+ (neg:SSEMODEF2P
+ (match_operand:SSEMODEF2P 1 "register_operand" "0,0"))
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
+ (match_dup 1)
+ (const_int 1))]
+ UNSPEC_SSE5_INTRINSIC))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
+ "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<ssescalarmode>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse_cvtpi2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4SF 1 "register_operand" "0")
+ (const_int 3)))]
+ "TARGET_SSE"
+ "cvtpi2ps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_cvtps2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC)
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_SSE"
+ "cvtps2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx")
+ (set_attr "mode" "DI")])
+
+(define_insn "sse_cvttps2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_select:V2SI
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_SSE"
+ "cvttps2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx")
+ (set_attr "mode" "SF")])
+
+(define_insn "*avx_cvtsi2ss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (match_operand:V4SF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtsi2ss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
+ (match_operand:V4SF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "cvtsi2ss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
+ (set_attr "mode" "SF")])
+
+(define_insn "*avx_cvtsi2ssq"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (match_operand:V4SF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX && TARGET_64BIT"
+ "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtsi2ssq"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+ (match_operand:V4SF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE && TARGET_64BIT"
+ "cvtsi2ssq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse_cvtss2si"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI
+ [(vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE"
+ "%vcvtss2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
+(define_insn "sse_cvtss2si_2"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE"
+ "%vcvtss2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
+(define_insn "sse_cvtss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI
+ [(vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE && TARGET_64BIT"
+ "%vcvtss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "DI")])
+
+(define_insn "sse_cvtss2siq_2"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE && TARGET_64BIT"
+ "%vcvtss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "DI")])
+
+(define_insn "sse_cvttss2si"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (fix:SI
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE"
+ "%vcvttss2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
+(define_insn "sse_cvttss2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (fix:DI
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE && TARGET_64BIT"
+ "%vcvttss2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "DI")])
+
+(define_insn "avx_cvtdq2ps<avxmodesuffix>"
+ [(set (match_operand:AVXMODEDCVTDQ2PS 0 "register_operand" "=x")
+ (float:AVXMODEDCVTDQ2PS
+ (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvtdq2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse2_cvtdq2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "cvtdq2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "avx_cvtps2dq<avxmodesuffix>"
+ [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
+ (unspec:AVXMODEDCVTPS2DQ
+ [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX"
+ "vcvtps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse2_cvtps2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2"
+ "cvtps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx_cvttps2dq<avxmodesuffix>"
+ [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
+ (fix:AVXMODEDCVTPS2DQ
+ (match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvttps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse2_cvttps2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "cvttps2dq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse2_cvtpi2pd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
+ "TARGET_SSE2"
+ "cvtpi2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx,*")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_cvtpd2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2"
+ "cvtpd2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvttpd2pi"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "cvttpd2pi\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "unit" "mmx")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_cvtsi2sd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF
+ (float:DF (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (match_operand:V2DF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse2_cvtsi2sd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF
+ (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
+ (match_operand:V2DF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "cvtsi2sd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")])
+
+(define_insn "*avx_cvtsi2sdq"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF
+ (float:DF (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (match_operand:V2DF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX && TARGET_64BIT"
+ "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse2_cvtsi2sdq"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_merge:V2DF
+ (vec_duplicate:V2DF
+ (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
+ (match_operand:V2DF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "cvtsi2sdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "mode" "DF")
+ (set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")])
+
+(define_insn "sse2_cvtsd2si"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI
+ [(vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2"
+ "%vcvtsd2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
+(define_insn "sse2_cvtsd2si_2"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2"
+ "%vcvtsd2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
+(define_insn "sse2_cvtsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI
+ [(vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "%vcvtsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvtsd2siq_2"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "%vcvtsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvttsd2si"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (fix:SI
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2"
+ "%vcvttsd2si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
+
+(define_insn "sse2_cvttsd2siq"
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (fix:DI
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0)]))))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ "%vcvttsd2siq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "DI")
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
+
+(define_insn "avx_cvtdq2pd256"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvtdq2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "sse2_cvtdq2pd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (float:V2DF
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+ "%vcvtdq2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "avx_cvtpd2dq256"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC))]
+ "TARGET_AVX"
+ "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_expand "sse2_cvtpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (vec_concat:V4SI
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
+ UNSPEC_FIX_NOTRUNC)
+ (match_dup 2)))]
+ "TARGET_SSE2"
+ "operands[2] = CONST0_RTX (V2SImode);")
+
+(define_insn "*sse2_cvtpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FIX_NOTRUNC)
+ (match_operand:V2SI 2 "const0_operand" "")))]
+ "TARGET_SSE2"
+ "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
+ : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")
+ (set_attr "amdfam10_decode" "double")])
+
+(define_insn "avx_cvttpd2dq256"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_expand "sse2_cvttpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (vec_concat:V4SI
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
+ (match_dup 2)))]
+ "TARGET_SSE2"
+ "operands[2] = CONST0_RTX (V2SImode);")
+
+(define_insn "*sse2_cvttpd2dq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+ (match_operand:V2SI 2 "const0_operand" "")))]
+ "TARGET_SSE2"
+ "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
+ : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")
+ (set_attr "amdfam10_decode" "double")])
+
+(define_insn "*avx_cvtsd2ss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")))
+ (match_operand:V4SF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse2_cvtsd2ss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
+ (match_operand:V4SF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "cvtsd2ss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
+ (set_attr "mode" "SF")])
+
+(define_insn "*avx_cvtss2sd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 1)])))
+ (match_operand:V2DF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse2_cvtss2sd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+ (vec_merge:V2DF
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
+ (parallel [(const_int 0) (const_int 1)])))
+ (match_operand:V2DF 1 "register_operand" "0,0")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "cvtss2sd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "amdfam10_decode" "vector,double")
+ (set_attr "mode" "DF")])
+
+(define_insn "avx_cvtpd2ps256"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (float_truncate:V4SF
+ (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "sse2_cvtpd2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (vec_concat:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 1 "nonimmediate_operand" ""))
+ (match_dup 2)))]
+ "TARGET_SSE2"
+ "operands[2] = CONST0_RTX (V2SFmode);")
+
+(define_insn "*sse2_cvtpd2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_concat:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
+ (match_operand:V2SF 2 "const0_operand" "")))]
+ "TARGET_SSE2"
+ "* return TARGET_AVX ? \"vcvtpd2ps{x}\t{%1, %0|%0, %1}\"
+ : \"cvtpd2ps\t{%1, %0|%0, %1}\";"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "V4SF")
+ (set_attr "amdfam10_decode" "double")])
+
+(define_insn "avx_cvtps2pd256"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (float_extend:V4DF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vcvtps2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "sse2_cvtps2pd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+ "%vcvtps2pd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "V2DF")
+ (set_attr "amdfam10_decode" "direct")])
+
+(define_expand "vec_unpacks_hi_v4sf"
+ [(set (match_dup 2)
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_dup 2)
+ (match_operand:V4SF 1 "nonimmediate_operand" ""))
+ (parallel [(const_int 6)
+ (const_int 7)
+ (const_int 2)
+ (const_int 3)])))
+ (set (match_operand:V2DF 0 "register_operand" "")
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_dup 2)
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+{
+ operands[2] = gen_reg_rtx (V4SFmode);
+})
+
+(define_expand "vec_unpacks_lo_v4sf"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2")
+
+(define_expand "vec_unpacks_float_hi_v8hi"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
+ emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "vec_unpacks_float_lo_v8hi"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
+ emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "vec_unpacku_float_hi_v8hi"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
+ emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "vec_unpacku_float_lo_v8hi"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
+ emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "vec_unpacks_float_hi_v4si"
+ [(set (match_dup 2)
+ (vec_select:V4SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 2)
+ (const_int 3)
+ (const_int 2)
+ (const_int 3)])))
+ (set (match_operand:V2DF 0 "register_operand" "")
+ (float:V2DF
+ (vec_select:V2SI
+ (match_dup 2)
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+{
+ operands[2] = gen_reg_rtx (V4SImode);
+})
+
+(define_expand "vec_unpacks_float_lo_v4si"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (float:V2DF
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2")
+
+(define_expand "vec_pack_trunc_v2df"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V4SFmode);
+ r2 = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
+ emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
+ emit_insn (gen_sse_movlhps (operands[0], r1, r2));
+ DONE;
+})
+
+(define_expand "vec_pack_sfix_trunc_v2df"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V4SImode);
+ r2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
+ emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
+ emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
+ gen_lowpart (V2DImode, r1),
+ gen_lowpart (V2DImode, r2)));
+ DONE;
+})
+
+(define_expand "vec_pack_sfix_v2df"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V4SImode);
+ r2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
+ emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
+ emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
+ gen_lowpart (V2DImode, r1),
+ gen_lowpart (V2DImode, r2)));
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel single-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "sse_movhlps_exp"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 6)
+ (const_int 7)
+ (const_int 2)
+ (const_int 3)])))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "*avx_movhlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
+ (parallel [(const_int 6)
+ (const_int 7)
+ (const_int 2)
+ (const_int 3)])))]
+ "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ vmovhlps\t{%2, %1, %0|%0, %1, %2}
+ vmovlps\t{%H2, %1, %0|%0, %1, %H2}
+ vmovhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_movhlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
+ (parallel [(const_int 6)
+ (const_int 7)
+ (const_int 2)
+ (const_int 3)])))]
+ "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ movhlps\t{%2, %0|%0, %2}
+ movlps\t{%H2, %0|%0, %H2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_expand "sse_movlhps_exp"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 4)
+ (const_int 5)])))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "*avx_movlhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 4)
+ (const_int 5)])))]
+ "TARGET_AVX && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
+ "@
+ vmovlhps\t{%2, %1, %0|%0, %1, %2}
+ vmovhps\t{%2, %1, %0|%0, %1, %2}
+ vmovlps\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_movlhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 4)
+ (const_int 5)])))]
+ "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
+ "@
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}
+ movlps\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "avx_unpckhps256"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))]
+ "TARGET_AVX"
+ "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "*avx_unpckhps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_AVX"
+ "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_unpckhps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_SSE"
+ "unpckhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "avx_unpcklps256"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)])))]
+ "TARGET_AVX"
+ "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "*avx_unpcklps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_AVX"
+ "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_unpcklps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_SSE"
+ "unpcklps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+;; These are modeled with the same vec_concat as the others so that we
+;; capture users of shufps that can use the new instructions
+(define_insn "avx_movshdup256"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 1) (const_int 1)
+ (const_int 3) (const_int 3)
+ (const_int 5) (const_int 5)
+ (const_int 7) (const_int 7)])))]
+ "TARGET_AVX"
+ "vmovshdup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "sse3_movshdup"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 1)
+ (const_int 1)
+ (const_int 7)
+ (const_int 7)])))]
+ "TARGET_SSE3"
+ "%vmovshdup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "avx_movsldup256"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 0) (const_int 0)
+ (const_int 2) (const_int 2)
+ (const_int 4) (const_int 4)
+ (const_int 6) (const_int 6)])))]
+ "TARGET_AVX"
+ "vmovsldup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "sse3_movsldup"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 0)
+ (const_int 6)
+ (const_int 6)])))]
+ "TARGET_SSE3"
+ "%vmovsldup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "avx_shufps256"
+ [(match_operand:V8SF 0 "register_operand" "")
+ (match_operand:V8SF 1 "register_operand" "")
+ (match_operand:V8SF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_AVX"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 8),
+ GEN_INT (((mask >> 6) & 3) + 8),
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 12),
+ GEN_INT (((mask >> 6) & 3) + 12)));
+ DONE;
+})
+
+;; One bit in mask selects 2 elements.
+(define_insn "avx_shufps256_1"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_operand:V8SF 1 "register_operand" "x")
+ (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_8_to_11_operand" "")
+ (match_operand 6 "const_8_to_11_operand" "")
+ (match_operand 7 "const_4_to_7_operand" "")
+ (match_operand 8 "const_4_to_7_operand" "")
+ (match_operand 9 "const_12_to_15_operand" "")
+ (match_operand 10 "const_12_to_15_operand" "")])))]
+ "TARGET_AVX
+ && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
+ && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
+ && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
+ && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 8) << 4;
+ mask |= (INTVAL (operands[6]) - 8) << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_expand "sse_shufps"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4)));
+ DONE;
+})
+
+(define_insn "*avx_shufps_<mode>"
+ [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
+ (vec_select:SSEMODE4S
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE4S 1 "register_operand" "x")
+ (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_4_to_7_operand" "")
+ (match_operand 6 "const_4_to_7_operand" "")])))]
+ "TARGET_AVX"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[3]) << 0;
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 4) << 4;
+ mask |= (INTVAL (operands[6]) - 4) << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_shufps_<mode>"
+ [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
+ (vec_select:SSEMODE4S
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE4S 1 "register_operand" "0")
+ (match_operand:SSEMODE4S 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_4_to_7_operand" "")
+ (match_operand 6 "const_4_to_7_operand" "")])))]
+ "TARGET_SSE"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[3]) << 0;
+ mask |= INTVAL (operands[4]) << 2;
+ mask |= (INTVAL (operands[5]) - 4) << 4;
+ mask |= (INTVAL (operands[6]) - 4) << 6;
+ operands[3] = GEN_INT (mask);
+
+ return "shufps\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse_storehps"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 2) (const_int 3)])))]
+ "TARGET_SSE"
+ "@
+ %vmovhps\t{%1, %0|%0, %1}
+ %vmovhlps\t{%1, %d0|%d0, %1}
+ %vmovlps\t{%H1, %d0|%d0, %H1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_expand "sse_loadhps_exp"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (vec_concat:V4SF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "*avx_loadhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_concat:V4SF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
+ "TARGET_AVX"
+ "@
+ vmovhps\t{%2, %1, %0|%0, %1, %2}
+ vmovlhps\t{%2, %1, %0|%0, %1, %2}
+ vmovlps\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_loadhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_concat:V4SF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
+ "TARGET_SSE"
+ "@
+ movhps\t{%2, %0|%0, %2}
+ movlhps\t{%2, %0|%0, %2}
+ movlps\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "*avx_storelps"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_AVX"
+ "@
+ vmovlps\t{%1, %0|%0, %1}
+ vmovaps\t{%1, %0|%0, %1}
+ vmovlps\t{%1, %0, %0|%0, %0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2SF,V2DF,V2SF")])
+
+(define_insn "sse_storelps"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_SSE"
+ "@
+ movlps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_expand "sse_loadlps_exp"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (vec_concat:V4SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "*avx_loadlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_concat:V4SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "x,m,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,0")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+ "@
+ shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
+ vmovlps\t{%2, %1, %0|%0, %1, %2}
+ vmovlps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_loadlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_concat:V4SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_SSE"
+ "@
+ shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
+ movlps\t{%2, %0|%0, %2}
+ movlps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "*avx_movss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vmovss\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse_movss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "movss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+(define_insn "*vec_dupv4sf_avx"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "register_operand" "x")))]
+ "TARGET_AVX"
+ "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*vec_dupv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "register_operand" "0")))]
+ "TARGET_SSE"
+ "shufps\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*vec_concatv2sf_avx"
+ [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
+ (vec_concat:V2SF
+ (match_operand:SF 1 "nonimmediate_operand" " x,x,m, x , m")
+ (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
+ "TARGET_AVX"
+ "@
+ vunpcklps\t{%2, %1, %0|%0, %1, %2}
+ vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
+ vmovss\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "3,4")
+ (const_string "orig")
+ (const_string "vex")))
+ (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
+
+;; Although insertps takes register source, we prefer
+;; unpcklps with register source since it is shorter.
+(define_insn "*vec_concatv2sf_sse4_1"
+ [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
+ (vec_concat:V2SF
+ (match_operand:SF 1 "nonimmediate_operand" " 0,0,m, 0 , m")
+ (match_operand:SF 2 "vector_move_operand" " x,m,C,*ym, C")))]
+ "TARGET_SSE4_1"
+ "@
+ unpcklps\t{%2, %0|%0, %2}
+ insertps\t{$0x10, %2, %0|%0, %2, 0x10}
+ movss\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
+ (set_attr "prefix_extra" "*,1,*,*,*")
+ (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
+
+;; ??? In theory we can match memory for the MMX alternative, but allowing
+;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
+;; alternatives pretty much forces the MMX alternative to be chosen.
+(define_insn "*vec_concatv2sf_sse"
+ [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
+ (vec_concat:V2SF
+ (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
+ (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
+ "TARGET_SSE"
+ "@
+ unpcklps\t{%2, %0|%0, %2}
+ movss\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+ (set_attr "mode" "V4SF,SF,DI,DI")])
+
+(define_insn "*vec_concatv4sf_avx"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_concat:V4SF
+ (match_operand:V2SF 1 "register_operand" " x,x")
+ (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
+ "TARGET_AVX"
+ "@
+ vmovlhps\t{%2, %1, %0|%0, %1, %2}
+ vmovhps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF,V2SF")])
+
+(define_insn "*vec_concatv4sf_sse"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_concat:V4SF
+ (match_operand:V2SF 1 "register_operand" " 0,0")
+ (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
+ "TARGET_SSE"
+ "@
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF,V2SF")])
+
+(define_expand "vec_init<mode>"
+ [(match_operand:SSEMODE 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "*vec_setv4sf_0_avx"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
+ (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "@
+ vmovss\t{%2, %1, %0|%0, %1, %2}
+ vmovss\t{%2, %0|%0, %2}
+ vmovd\t{%2, %0|%0, %2}
+ #"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
+(define_insn "vec_setv4sf_0"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
+ (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "@
+ movss\t{%2, %0|%0, %2}
+ movss\t{%2, %0|%0, %2}
+ movd\t{%2, %0|%0, %2}
+ #"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+;; A subset is vec_setv4sf.
+(define_insn "*vec_setv4sf_avx"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4SF 1 "register_operand" "x")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_AVX"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
+ return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*vec_setv4sf_sse4_1"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_SSE4_1"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
+ return "insertps\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_insertps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (match_operand:V4SF 1 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_INSERTPS))]
+ "TARGET_AVX"
+ "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_insertps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_INSERTPS))]
+ "TARGET_SSE4_1"
+ "insertps\t{%3, %2, %0|%0, %2, %3}";
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_split
+ [(set (match_operand:V4SF 0 "memory_operand" "")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "nonmemory_operand" ""))
+ (match_dup 0)
+ (const_int 1)))]
+ "TARGET_SSE && reload_completed"
+ [(const_int 0)]
+{
+ emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
+ DONE;
+})
+
+(define_expand "vec_set<mode>"
+ [(match_operand:SSEMODE 0 "register_operand" "")
+ (match_operand:<ssescalarmode> 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_insn_and_split "*vec_extractv4sf_0"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (SFmode, REGNO (op1));
+ else
+ op1 = gen_lowpart (SFmode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_expand "avx_vextractf128<mode>"
+ [(match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "")
+ (match_operand:AVX256MODE 1 "register_operand" "")
+ (match_operand:SI 2 "const_0_to_1_operand" "")]
+ "TARGET_AVX"
+{
+ switch (INTVAL (operands[2]))
+ {
+ case 0:
+ emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
+ break;
+ case 1:
+ emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
+})
+
+(define_insn "vec_extract_lo_<mode>"
+ [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_hi_<mode>"
+ [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
+ (parallel [(const_int 2) (const_int 3)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_lo_<mode>"
+ [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_hi_<mode>"
+ [(set (match_operand:<avxhalfvecmode> 0 "nonimmediate_operand" "=x,m")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 1 "register_operand" "x,x")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_lo_v16hi"
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "x,x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_hi_v16hi"
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "x,x")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_lo_v32qi"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "x,x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_extract_hi_v32qi"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "x,x")
+ (parallel [(const_int 16) (const_int 17)
+ (const_int 18) (const_int 19)
+ (const_int 20) (const_int 21)
+ (const_int 22) (const_int 23)
+ (const_int 24) (const_int 25)
+ (const_int 26) (const_int 27)
+ (const_int 28) (const_int 29)
+ (const_int 30) (const_int 31)])))]
+ "TARGET_AVX"
+ "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "*sse4_1_extractps"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "%vextractps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn_and_split "*vec_extract_v4sf_mem"
+ [(set (match_operand:SF 0 "register_operand" "=x*rf")
+ (vec_select:SF
+ (match_operand:V4SF 1 "memory_operand" "o")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+{
+ int i = INTVAL (operands[2]);
+
+ emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
+ DONE;
+})
+
+(define_expand "vec_extract<mode>"
+ [(match_operand:<ssescalarmode> 0 "register_operand" "")
+ (match_operand:SSEMODE 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "avx_unpckhpd256"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 1) (const_int 5)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_AVX"
+ "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
+(define_expand "sse2_unpckhpd_exp"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "*avx_unpckhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ vunpckhpd\t{%2, %1, %0|%0, %1, %2}
+ vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
+ vmovhpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+(define_insn "sse2_unpckhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ unpckhpd\t{%2, %0|%0, %2}
+ movlpd\t{%H1, %0|%0, %H1}
+ movhpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+(define_insn "avx_movddup256"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "nonimmediate_operand" "xm")
+ (match_dup 1))
+ (parallel [(const_int 0) (const_int 2)
+ (const_int 4) (const_int 6)])))]
+ "TARGET_AVX"
+ "vmovddup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
+(define_insn "*avx_movddup"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ vmovddup\t{%1, %0|%0, %1}
+ #"
+ [(set_attr "type" "sselog1,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*sse3_movddup"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movddup\t{%1, %0|%0, %1}
+ #"
+ [(set_attr "type" "sselog1,ssemov")
+ (set_attr "mode" "V2DF")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "memory_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE3 && reload_completed"
+ [(const_int 0)]
+{
+ rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
+ emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
+ emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
+ DONE;
+})
+
+(define_insn "avx_unpcklpd256"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 2) (const_int 6)])))]
+ "TARGET_AVX"
+ "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
+(define_expand "sse2_unpcklpd_exp"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "*avx_unpcklpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+ vmovhpd\t{%2, %1, %0|%0, %1, %2}
+ vmovlpd\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+(define_insn "sse2_unpcklpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ unpcklpd\t{%2, %0|%0, %2}
+ movhpd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+(define_expand "avx_shufpd256"
+ [(match_operand:V4DF 0 "register_operand" "")
+ (match_operand:V4DF 1 "register_operand" "")
+ (match_operand:V4DF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_AVX"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
+ GEN_INT (mask & 1),
+ GEN_INT (mask & 2 ? 5 : 4),
+ GEN_INT (mask & 4 ? 3 : 2),
+ GEN_INT (mask & 8 ? 7 : 6)));
+ DONE;
+})
+
+(define_insn "avx_shufpd256_1"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_1_operand" "")
+ (match_operand 4 "const_4_to_5_operand" "")
+ (match_operand 5 "const_2_to_3_operand" "")
+ (match_operand 6 "const_6_to_7_operand" "")])))]
+ "TARGET_AVX"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= (INTVAL (operands[4]) - 4) << 1;
+ mask |= (INTVAL (operands[5]) - 2) << 2;
+ mask |= (INTVAL (operands[6]) - 6) << 3;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
+(define_expand "sse2_shufpd"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_SSE2"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
+ GEN_INT (mask & 1),
+ GEN_INT (mask & 2 ? 3 : 2)));
+ DONE;
+})
+
+(define_expand "vec_extract_even<mode>"
+ [(set (match_operand:SSEMODE4S 0 "register_operand" "")
+ (vec_select:SSEMODE4S
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE4S 1 "register_operand" "")
+ (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))]
+ "TARGET_SSE")
+
+(define_expand "vec_extract_odd<mode>"
+ [(set (match_operand:SSEMODE4S 0 "register_operand" "")
+ (vec_select:SSEMODE4S
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE4S 1 "register_operand" "")
+ (match_operand:SSEMODE4S 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))]
+ "TARGET_SSE")
+
+(define_expand "vec_extract_even<mode>"
+ [(set (match_operand:SSEMODE2D 0 "register_operand" "")
+ (vec_select:SSEMODE2D
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE2D 1 "register_operand" "")
+ (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2")
+
+(define_expand "vec_extract_odd<mode>"
+ [(set (match_operand:SSEMODE2D 0 "register_operand" "")
+ (vec_select:SSEMODE2D
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE2D 1 "register_operand" "")
+ (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2")
+
+;; punpcklqdq and punpckhqdq are shorter than shufpd.
+(define_insn "*avx_punpckhqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_AVX"
+ "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2"
+ "punpckhqdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_punpcklqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_AVX"
+ "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2"
+ "punpcklqdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_shufpd_<mode>"
+ [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
+ (vec_select:SSEMODE2D
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE2D 1 "register_operand" "x")
+ (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_1_operand" "")
+ (match_operand 4 "const_2_to_3_operand" "")])))]
+ "TARGET_AVX"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= (INTVAL (operands[4]) - 2) << 1;
+ operands[3] = GEN_INT (mask);
+
+ return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_shufpd_<mode>"
+ [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
+ (vec_select:SSEMODE2D
+ (vec_concat:<ssedoublesizemode>
+ (match_operand:SSEMODE2D 1 "register_operand" "0")
+ (match_operand:SSEMODE2D 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_1_operand" "")
+ (match_operand 4 "const_2_to_3_operand" "")])))]
+ "TARGET_SSE2"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= (INTVAL (operands[4]) - 2) << 1;
+ operands[3] = GEN_INT (mask);
+
+ return "shufpd\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*avx_storehpd"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,x,o,o,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ vmovhpd\t{%1, %0|%0, %1}
+ vunpckhpd\t{%1, %1, %0|%0, %1, %1}
+ #
+ #
+ #"
+ [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
+
+(define_insn "sse2_storehpd"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o,o,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhpd\t{%1, %0|%0, %1}
+ unpckhpd\t%0, %0
+ #
+ #
+ #"
+ [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
+ (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (vec_select:DF
+ (match_operand:V2DF 1 "memory_operand" "")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[1] = adjust_address (operands[1], DFmode, 8);
+})
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "sse2_storelpd"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ %vmovlpd\t{%1, %0|%0, %1}
+ #
+ #
+ #
+ #"
+ [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "V1DF,DF,DF,DF,DF")])
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2 && reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (DFmode, REGNO (op1));
+ else
+ op1 = gen_lowpart (DFmode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_expand "sse2_loadhpd_exp"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0)]))
+ (match_operand:DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*avx_loadhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o,o,o")
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,0,0")
+ (parallel [(const_int 0)]))
+ (match_operand:DF 2 "nonimmediate_operand" " m,x,x,*f,r")))]
+ "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ vmovhpd\t{%2, %1, %0|%0, %1, %2}
+ vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+ #
+ #
+ #"
+ [(set_attr "type" "ssemov,sselog,ssemov,fmov,imov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
+
+(define_insn "sse2_loadhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o,o,o")
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0,0,0")
+ (parallel [(const_int 0)]))
+ (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x,*f,r")))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ movhpd\t{%2, %0|%0, %2}
+ unpcklpd\t{%2, %0|%0, %2}
+ shufpd\t{$1, %1, %0|%0, %1, 1}
+ #
+ #
+ #"
+ [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
+ (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "memory_operand" "")
+ (vec_concat:V2DF
+ (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
+ (match_operand:DF 1 "register_operand" "")))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[0] = adjust_address (operands[0], DFmode, 8);
+})
+
+(define_expand "sse2_loadlpd_exp"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (vec_concat:V2DF
+ (match_operand:DF 2 "nonimmediate_operand" "")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 1)]))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+;; Avoid combining registers from different units in a single alternative,
+;; see comment above inline_secondary_memory_needed function in i386.c
+(define_insn "*avx_loadlpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,m,m")
+ (vec_concat:V2DF
+ (match_operand:DF 2 "nonimmediate_operand" " m,m,x,x,x,*f,r")
+ (vec_select:DF
+ (match_operand:V2DF 1 "vector_move_operand" " C,x,x,o,0,0,0")
+ (parallel [(const_int 1)]))))]
+ "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ vmovsd\t{%2, %0|%0, %2}
+ vmovlpd\t{%2, %1, %0|%0, %1, %2}
+ vmovsd\t{%2, %1, %0|%0, %1, %2}
+ vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
+ #
+ #
+ #"
+ [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,fmov,imov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DF,V1DF,V1DF,V1DF,DF,DF,DF")])
+
+(define_insn "sse2_loadlpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m,m,m")
+ (vec_concat:V2DF
+ (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x,*f,r")
+ (vec_select:DF
+ (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0,0,0")
+ (parallel [(const_int 1)]))))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ movsd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}
+ movsd\t{%2, %0|%0, %2}
+ shufpd\t{$2, %2, %0|%0, %2, 2}
+ movhpd\t{%H1, %0|%0, %H1}
+ #
+ #
+ #"
+ [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
+ (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "memory_operand" "")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "register_operand" "")
+ (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[0] = adjust_address (operands[0], DFmode, 8);
+})
+
+;; Not sure these two are ever used, but it doesn't hurt to have
+;; them. -aoliva
+(define_insn "*vec_extractv2df_1_sse"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 1)])))]
+ "!TARGET_SSE2 && TARGET_SSE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ movhlps\t{%1, %0|%0, %1}
+ movlps\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "*vec_extractv2df_0_sse"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
+ (parallel [(const_int 0)])))]
+ "!TARGET_SSE2 && TARGET_SSE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movlps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "*avx_movsd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,o")
+ (vec_merge:V2DF
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,x,0")
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0,o,x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "@
+ vmovsd\t{%2, %1, %0|%0, %1, %2}
+ vmovlpd\t{%2, %1, %0|%0, %1, %2}
+ vmovlpd\t{%2, %0|%0, %2}
+ vmovhps\t{%H1, %2, %0|%0, %2, %H1}
+ vmovhps\t{%1, %H0|%H0, %1}"
+ [(set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF")])
+
+(define_insn "sse2_movsd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
+ (vec_merge:V2DF
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "@
+ movsd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}
+ shufpd\t{$2, %2, %0|%0, %2, 2}
+ movhps\t{%H1, %0|%0, %H1}
+ movhps\t{%1, %H0|%H0, %1}"
+ [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
+ (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
+
+(define_insn "*vec_dupv2df_sse3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_duplicate:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE3"
+ "%vmovddup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "DF")])
+
+(define_insn "vec_dupv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_duplicate:V2DF
+ (match_operand:DF 1 "register_operand" "0")))]
+ "TARGET_SSE2"
+ "unpcklpd\t%0, %0"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*vec_concatv2df_sse3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" "xm")
+ (match_dup 1)))]
+ "TARGET_SSE3"
+ "%vmovddup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "DF")])
+
+(define_insn "*vec_concatv2df_avx"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" " x,x,m")
+ (match_operand:DF 2 "vector_move_operand" " x,m,C")))]
+ "TARGET_AVX"
+ "@
+ vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+ vmovhpd\t{%2, %1, %0|%0, %1, %2}
+ vmovsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "DF,V1DF,DF")])
+
+(define_insn "*vec_concatv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
+ (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
+ "TARGET_SSE"
+ "@
+ unpcklpd\t{%2, %0|%0, %2}
+ movhpd\t{%2, %0|%0, %2}
+ movsd\t{%1, %0|%0, %1}
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
+ (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "neg<mode>2"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (minus:SSEMODEI
+ (match_dup 2)
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
+
+(define_expand "<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (plusminus:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx_<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (plusminus:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>x")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (plusminus:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODE12 0 "register_operand" "")
+ (sat_plusminus:SSEMODE12
+ (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx_<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+ (sat_plusminus:SSEMODE12
+ (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>x")
+ (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "vp<plusminus_mnemonic><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+ (sat_plusminus:SSEMODE12
+ (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
+ (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn_and_split "mulv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "")
+ (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
+ (match_operand:V16QI 2 "register_operand" "")))]
+ "TARGET_SSE2
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx t[12], op0, op[3];
+ int i;
+
+ if (TARGET_SSE5)
+ {
+ /* On SSE5, we can take advantage of the pperm instruction to pack and
+ unpack the bytes. Unpack data such that we've got a source byte in
+ each low byte of each word. We don't care what goes into the high
+ byte, so put 0 there. */
+ for (i = 0; i < 6; ++i)
+ t[i] = gen_reg_rtx (V8HImode);
+
+ for (i = 0; i < 2; i++)
+ {
+ op[0] = t[i];
+ op[1] = operands[i+1];
+ ix86_expand_sse5_unpack (op, true, true); /* high bytes */
+
+ op[0] = t[i+2];
+ ix86_expand_sse5_unpack (op, true, false); /* low bytes */
+ }
+
+ /* Multiply words. */
+ emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
+ emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
+
+ /* Pack the low byte of each word back into a single xmm */
+ op[0] = operands[0];
+ op[1] = t[5];
+ op[2] = t[4];
+ ix86_expand_sse5_pack (op);
+ DONE;
+ }
+
+ for (i = 0; i < 12; ++i)
+ t[i] = gen_reg_rtx (V16QImode);
+
+ /* Unpack data such that we've got a source byte in each low byte of
+ each word. We don't care what goes into the high byte of each word.
+ Rather than trying to get zero in there, most convenient is to let
+ it be a copy of the low byte. */
+ emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
+ emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
+ emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
+ emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
+
+ /* Multiply words. The end-of-line annotations here give a picture of what
+ the output of that instruction looks like. Dot means don't care; the
+ letters are the bytes of the result with A being the most significant. */
+ emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
+ gen_lowpart (V8HImode, t[0]),
+ gen_lowpart (V8HImode, t[1])));
+ emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
+ gen_lowpart (V8HImode, t[2]),
+ gen_lowpart (V8HImode, t[3])));
+
+ /* Extract the relevant bytes and merge them back together. */
+ emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
+ emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
+ emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
+ emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
+ emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
+ emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
+
+ op0 = operands[0];
+ emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
+ DONE;
+})
+
+(define_expand "mulv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*avx_mulv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "vpmullw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*mulv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmullw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "smulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" ""))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))
+ (const_int 16))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*avxv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 16))))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "vpmulhw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*smulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 16))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmulhw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "umulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" ""))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))
+ (const_int 16))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*avx_umulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 16))))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*umulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 16))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmulhuw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_umulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "")
+ (mult:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)])))
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
+
+(define_insn "*avx_umulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (mult:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+ (parallel [(const_int 0) (const_int 2)])))
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_umulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (mult:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0) (const_int 2)])))
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "pmuludq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse4_1_mulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "")
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_SSE4_1"
+ "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
+
+(define_insn "*avx_mulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+ (parallel [(const_int 0) (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_mulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0) (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "pmuldq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_pmaddwd"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*avx_pmaddwd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_pmaddwd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmaddwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1 || TARGET_SSE5)
+ ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
+})
+
+(define_insn "*avx_mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "vpmulld\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "pmulld\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
+;; multiply/add. In general, we expect the define_split to occur before
+;; register allocation, so we have to handle the corner case where the target
+;; is the same as one of the inputs.
+(define_insn_and_split "*sse5_mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=&x")
+ (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE5"
+ "#"
+ "&& (reload_completed
+ || (!reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])))"
+ [(set (match_dup 0)
+ (match_dup 3))
+ (set (match_dup 0)
+ (plus:V4SI (mult:V4SI (match_dup 1)
+ (match_dup 2))
+ (match_dup 0)))]
+{
+ operands[3] = CONST0_RTX (V4SImode);
+}
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn_and_split "*sse2_mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")))]
+ "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx t1, t2, t3, t4, t5, t6, thirtytwo;
+ rtx op0, op1, op2;
+
+ op0 = operands[0];
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+ t3 = gen_reg_rtx (V4SImode);
+ t4 = gen_reg_rtx (V4SImode);
+ t5 = gen_reg_rtx (V4SImode);
+ t6 = gen_reg_rtx (V4SImode);
+ thirtytwo = GEN_INT (32);
+
+ /* Multiply elements 2 and 0. */
+ emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
+ op1, op2));
+
+ /* Shift both input vectors down one element, so that elements 3
+ and 1 are now in the slots for elements 2 and 0. For K8, at
+ least, this is faster than using a shuffle. */
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
+ gen_lowpart (TImode, op1),
+ thirtytwo));
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
+ gen_lowpart (TImode, op2),
+ thirtytwo));
+ /* Multiply elements 3 and 1. */
+ emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
+ t2, t3));
+
+ /* Move the results in element 2 down to element 1; we don't care
+ what goes in elements 2 and 3. */
+ emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
+ const0_rtx, const0_rtx));
+ emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
+ const0_rtx, const0_rtx));
+
+ /* Merge the parts back together. */
+ emit_insn (gen_sse2_punpckldq (op0, t5, t6));
+ DONE;
+})
+
+(define_insn_and_split "mulv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "")
+ (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
+ (match_operand:V2DI 2 "register_operand" "")))]
+ "TARGET_SSE2
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx t1, t2, t3, t4, t5, t6, thirtytwo;
+ rtx op0, op1, op2;
+
+ if (TARGET_SSE5)
+ {
+ /* op1: A,B,C,D, op2: E,F,G,H */
+ op0 = operands[0];
+ op1 = gen_lowpart (V4SImode, operands[1]);
+ op2 = gen_lowpart (V4SImode, operands[2]);
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+ t3 = gen_reg_rtx (V4SImode);
+ t4 = gen_reg_rtx (V2DImode);
+ t5 = gen_reg_rtx (V2DImode);
+
+ /* t1: B,A,D,C */
+ emit_insn (gen_sse2_pshufd_1 (t1, op1,
+ GEN_INT (1),
+ GEN_INT (0),
+ GEN_INT (3),
+ GEN_INT (2)));
+
+ /* t2: 0 */
+ emit_move_insn (t2, CONST0_RTX (V4SImode));
+
+ /* t3: (B*E),(A*F),(D*G),(C*H) */
+ emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
+
+ /* t4: (B*E)+(A*F), (D*G)+(C*H) */
+ emit_insn (gen_sse5_phadddq (t4, t3));
+
+ /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
+ emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32)));
+
+ /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
+ emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5));
+ DONE;
+ }
+
+ op0 = operands[0];
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V2DImode);
+ t2 = gen_reg_rtx (V2DImode);
+ t3 = gen_reg_rtx (V2DImode);
+ t4 = gen_reg_rtx (V2DImode);
+ t5 = gen_reg_rtx (V2DImode);
+ t6 = gen_reg_rtx (V2DImode);
+ thirtytwo = GEN_INT (32);
+
+ /* Multiply low parts. */
+ emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
+ gen_lowpart (V4SImode, op2)));
+
+ /* Shift input vectors left 32 bits so we can multiply high parts. */
+ emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
+ emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
+
+ /* Multiply high parts by low parts. */
+ emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
+ gen_lowpart (V4SImode, t3)));
+ emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
+ gen_lowpart (V4SImode, t2)));
+
+ /* Shift them back. */
+ emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
+ emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
+
+ /* Add the three parts together. */
+ emit_insn (gen_addv2di3 (t6, t1, t4));
+ emit_insn (gen_addv2di3 (op0, t6, t5));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE5"
+{
+ rtx t1, t2;
+
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
+ GEN_INT (0),
+ GEN_INT (2),
+ GEN_INT (1),
+ GEN_INT (3)));
+ emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
+ GEN_INT (0),
+ GEN_INT (2),
+ GEN_INT (1),
+ GEN_INT (3)));
+ emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE5"
+{
+ rtx t1, t2;
+
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
+ GEN_INT (0),
+ GEN_INT (2),
+ GEN_INT (1),
+ GEN_INT (3)));
+ emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
+ GEN_INT (0),
+ GEN_INT (2),
+ GEN_INT (1),
+ GEN_INT (3)));
+ emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2));
+ DONE;
+ DONE;
+})
+
+(define_expand "vec_widen_umult_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+
+(define_expand "sdot_prodv8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")
+ (match_operand:V4SI 3 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx t = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
+ emit_insn (gen_addv4si3 (operands[0], operands[3], t));
+ DONE;
+})
+
+(define_expand "udot_prodv4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")
+ (match_operand:V2DI 3 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx t1, t2, t3, t4;
+
+ t1 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
+ emit_insn (gen_addv2di3 (t1, t1, operands[3]));
+
+ t2 = gen_reg_rtx (V4SImode);
+ t3 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
+ gen_lowpart (TImode, operands[1]),
+ GEN_INT (32)));
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
+ gen_lowpart (TImode, operands[2]),
+ GEN_INT (32)));
+
+ t4 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
+
+ emit_insn (gen_addv2di3 (operands[0], t1, t4));
+ DONE;
+})
+
+(define_insn "*avx_ashr<mode>3"
+ [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
+ (ashiftrt:SSEMODE24
+ (match_operand:SSEMODE24 1 "register_operand" "x")
+ (match_operand:SI 2 "nonmemory_operand" "xN")))]
+ "TARGET_AVX"
+ "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ashr<mode>3"
+ [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
+ (ashiftrt:SSEMODE24
+ (match_operand:SSEMODE24 1 "register_operand" "0")
+ (match_operand:SI 2 "nonmemory_operand" "xN")))]
+ "TARGET_SSE2"
+ "psra<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_lshr<mode>3"
+ [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+ (lshiftrt:SSEMODE248
+ (match_operand:SSEMODE248 1 "register_operand" "x")
+ (match_operand:SI 2 "nonmemory_operand" "xN")))]
+ "TARGET_AVX"
+ "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "lshr<mode>3"
+ [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+ (lshiftrt:SSEMODE248
+ (match_operand:SSEMODE248 1 "register_operand" "0")
+ (match_operand:SI 2 "nonmemory_operand" "xN")))]
+ "TARGET_SSE2"
+ "psrl<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_ashl<mode>3"
+ [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+ (ashift:SSEMODE248
+ (match_operand:SSEMODE248 1 "register_operand" "x")
+ (match_operand:SI 2 "nonmemory_operand" "xN")))]
+ "TARGET_AVX"
+ "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ashl<mode>3"
+ [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
+ (ashift:SSEMODE248
+ (match_operand:SSEMODE248 1 "register_operand" "0")
+ (match_operand:SI 2 "nonmemory_operand" "xN")))]
+ "TARGET_SSE2"
+ "psll<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "vec_shl_<mode>"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
+ "TARGET_SSE2"
+{
+ operands[0] = gen_lowpart (TImode, operands[0]);
+ operands[1] = gen_lowpart (TImode, operands[1]);
+})
+
+(define_expand "vec_shr_<mode>"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
+ "TARGET_SSE2"
+{
+ operands[0] = gen_lowpart (TImode, operands[0]);
+ operands[1] = gen_lowpart (TImode, operands[1]);
+})
+
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (maxmin:SSEMODE124
+ (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
+ (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_expand "<code>v16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "")
+ (umaxmin:V16QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "")
+ (match_operand:V16QI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
+
+(define_insn "*<code>v16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (umaxmin:V16QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "%0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
+ "p<maxminiprefix>b\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "<code>v8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (smaxmin:V8HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
+
+(define_insn "*<code>v8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (smaxmin:V8HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
+ "p<maxminiprefix>w\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "umaxv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
+ else
+ {
+ rtx op0 = operands[0], op2 = operands[2], op3 = op0;
+ if (rtx_equal_p (op3, op2))
+ op3 = gen_reg_rtx (V8HImode);
+ emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
+ emit_insn (gen_addv8hi3 (op0, op3, op2));
+ DONE;
+ }
+})
+
+(define_expand "smax<mode>3"
+ [(set (match_operand:SSEMODE14 0 "register_operand" "")
+ (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
+ (match_operand:SSEMODE14 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
+ else
+ {
+ rtx xops[6];
+ bool ok;
+
+ xops[0] = operands[0];
+ xops[1] = operands[1];
+ xops[2] = operands[2];
+ xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+ }
+})
+
+(define_insn "*sse4_1_<code><mode>3"
+ [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
+ (smaxmin:SSEMODE14
+ (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "umaxv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
+ else
+ {
+ rtx xops[6];
+ bool ok;
+
+ xops[0] = operands[0];
+ xops[1] = operands[1];
+ xops[2] = operands[2];
+ xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+ }
+})
+
+(define_insn "*sse4_1_<code><mode>3"
+ [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
+ (umaxmin:SSEMODE24
+ (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "p<maxminiprefix><ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "smin<mode>3"
+ [(set (match_operand:SSEMODE14 0 "register_operand" "")
+ (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
+ (match_operand:SSEMODE14 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
+ else
+ {
+ rtx xops[6];
+ bool ok;
+
+ xops[0] = operands[0];
+ xops[1] = operands[2];
+ xops[2] = operands[1];
+ xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+ }
+})
+
+(define_expand "umin<mode>3"
+ [(set (match_operand:SSEMODE24 0 "register_operand" "")
+ (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
+ (match_operand:SSEMODE24 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
+ else
+ {
+ rtx xops[6];
+ bool ok;
+
+ xops[0] = operands[0];
+ xops[1] = operands[2];
+ xops[2] = operands[1];
+ xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+ }
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral comparisons
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "sse2_eq<mode>3"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "")
+ (eq:SSEMODE124
+ (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2 && !TARGET_SSE5"
+ "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+
+(define_insn "*avx_eq<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (eq:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
+ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+ "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_eq<mode>3"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (eq:SSEMODE124
+ (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && !TARGET_SSE5
+ && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+ "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse4_1_eqv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "")
+ (eq:V2DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "")
+ (match_operand:V2DI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE4_1"
+ "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
+
+(define_insn "*sse4_1_eqv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (eq:V2DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
+ "pcmpeqq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_gt<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (gt:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "register_operand" "x")
+ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_gt<mode>3"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (gt:SSEMODE124
+ (match_operand:SSEMODE124 1 "register_operand" "0")
+ (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && !TARGET_SSE5"
+ "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_gtv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (gt:V2DI
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_2"
+ "pcmpgtq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
+(define_expand "vcond<mode>"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (if_then_else:SSEMODEI
+ (match_operator 3 ""
+ [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
+ (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
+ (match_operand:SSEMODEI 1 "general_operand" "")
+ (match_operand:SSEMODEI 2 "general_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (ix86_expand_int_vcond (operands))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_expand "vcondu<mode>"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (if_then_else:SSEMODEI
+ (match_operator 3 ""
+ [(match_operand:SSEMODEI 4 "nonimmediate_operand" "")
+ (match_operand:SSEMODEI 5 "nonimmediate_operand" "")])
+ (match_operand:SSEMODEI 1 "general_operand" "")
+ (match_operand:SSEMODEI 2 "general_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (ix86_expand_int_vcond (operands))
+ DONE;
+ else
+ FAIL;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel bitwise logical operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "one_cmpl<mode>2"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+ (match_dup 2)))]
+ "TARGET_SSE2"
+{
+ int i, n = GET_MODE_NUNITS (<MODE>mode);
+ rtvec v = rtvec_alloc (n);
+
+ for (i = 0; i < n; ++i)
+ RTVEC_ELT (v, i) = constm1_rtx;
+
+ operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
+})
+
+(define_insn "*avx_andnot<mode>3"
+ [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
+ (and:AVX256MODEI
+ (not:AVX256MODEI (match_operand:AVX256MODEI 1 "register_operand" "x"))
+ (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vandnps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecpsmode>")])
+
+(define_insn "*sse_andnot<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (and:SSEMODEI
+ (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "(TARGET_SSE && !TARGET_SSE2)"
+ "andnps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_andnot<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (and:SSEMODEI
+ (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "x"))
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vpandn\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_andnot<mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (and:SSEMODEI
+ (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pandn\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*andnottf3"
+ [(set (match_operand:TF 0 "register_operand" "=x")
+ (and:TF
+ (not:TF (match_operand:TF 1 "register_operand" "0"))
+ (match_operand:TF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2"
+ "pandn\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "<code><mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (plogic:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
+ (plogic:AVX256MODEI
+ (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
+ (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecpsmode>")])
+
+(define_insn "*sse_<code><mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (plogic:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "(TARGET_SSE && !TARGET_SSE2)
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "<plogicprefix>ps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*avx_<code><mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (plogic:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX
+ && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_<code><mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (plogic:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "p<plogicprefix>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "<code>tf3"
+ [(set (match_operand:TF 0 "register_operand" "")
+ (plogic:TF
+ (match_operand:TF 1 "nonimmediate_operand" "")
+ (match_operand:TF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
+
+(define_insn "*<code>tf3"
+ [(set (match_operand:TF 0 "register_operand" "=x")
+ (plogic:TF
+ (match_operand:TF 1 "nonimmediate_operand" "%0")
+ (match_operand:TF 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
+ "p<plogicprefix>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Reduce:
+;; op1 = abcdefghijklmnop
+;; op2 = qrstuvwxyz012345
+;; h1 = aqbrcsdteufvgwhx
+;; l1 = iyjzk0l1m2n3o4p5
+;; h2 = aiqybjrzcks0dlt1
+;; l2 = emu2fnv3gow4hpx5
+;; h3 = aeimquy2bfjnrvz3
+;; l3 = cgkosw04dhlptx15
+;; result = bdfhjlnprtvxz135
+(define_expand "vec_pack_trunc_v8hi"
+ [(match_operand:V16QI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, h1, l1, h2, l2, h3, l3;
+
+ if (TARGET_SSE5)
+ {
+ ix86_expand_sse5_pack (operands);
+ DONE;
+ }
+
+ op1 = gen_lowpart (V16QImode, operands[1]);
+ op2 = gen_lowpart (V16QImode, operands[2]);
+ h1 = gen_reg_rtx (V16QImode);
+ l1 = gen_reg_rtx (V16QImode);
+ h2 = gen_reg_rtx (V16QImode);
+ l2 = gen_reg_rtx (V16QImode);
+ h3 = gen_reg_rtx (V16QImode);
+ l3 = gen_reg_rtx (V16QImode);
+
+ emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
+ emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
+ emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
+ emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
+ emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
+ emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
+ emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
+ DONE;
+})
+
+;; Reduce:
+;; op1 = abcdefgh
+;; op2 = ijklmnop
+;; h1 = aibjckdl
+;; l1 = emfngohp
+;; h2 = aeimbfjn
+;; l2 = cgkodhlp
+;; result = bdfhjlnp
+(define_expand "vec_pack_trunc_v4si"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, h1, l1, h2, l2;
+
+ if (TARGET_SSE5)
+ {
+ ix86_expand_sse5_pack (operands);
+ DONE;
+ }
+
+ op1 = gen_lowpart (V8HImode, operands[1]);
+ op2 = gen_lowpart (V8HImode, operands[2]);
+ h1 = gen_reg_rtx (V8HImode);
+ l1 = gen_reg_rtx (V8HImode);
+ h2 = gen_reg_rtx (V8HImode);
+ l2 = gen_reg_rtx (V8HImode);
+
+ emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
+ emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
+ emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
+ emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
+ emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
+ DONE;
+})
+
+;; Reduce:
+;; op1 = abcd
+;; op2 = efgh
+;; h1 = aebf
+;; l1 = cgdh
+;; result = bdfh
+(define_expand "vec_pack_trunc_v2di"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V2DI 1 "register_operand" "")
+ (match_operand:V2DI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, h1, l1;
+
+ if (TARGET_SSE5)
+ {
+ ix86_expand_sse5_pack (operands);
+ DONE;
+ }
+
+ op1 = gen_lowpart (V4SImode, operands[1]);
+ op2 = gen_lowpart (V4SImode, operands[2]);
+ h1 = gen_reg_rtx (V4SImode);
+ l1 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
+ emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
+ emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
+ DONE;
+})
+
+(define_expand "vec_interleave_highv16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "")
+ (match_operand:V16QI 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 8) (const_int 24)
+ (const_int 9) (const_int 25)
+ (const_int 10) (const_int 26)
+ (const_int 11) (const_int 27)
+ (const_int 12) (const_int 28)
+ (const_int 13) (const_int 29)
+ (const_int 14) (const_int 30)
+ (const_int 15) (const_int 31)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_lowv16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "")
+ (match_operand:V16QI 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0) (const_int 16)
+ (const_int 1) (const_int 17)
+ (const_int 2) (const_int 18)
+ (const_int 3) (const_int 19)
+ (const_int 4) (const_int 20)
+ (const_int 5) (const_int 21)
+ (const_int 6) (const_int 22)
+ (const_int 7) (const_int 23)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_highv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_lowv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_highv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_lowv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_highv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "")
+ (match_operand:V2DI 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_lowv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "")
+ (vec_select:V2DI
+ (vec_concat:V4DI
+ (match_operand:V2DI 1 "register_operand" "")
+ (match_operand:V2DI 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2"
+{
+ emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_expand "vec_interleave_highv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_SSE")
+
+(define_expand "vec_interleave_lowv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "")
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand:V4SF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_SSE")
+
+(define_expand "vec_interleave_highv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2")
+
+(define_expand "vec_interleave_lowv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2")
+
+(define_insn "*avx_packsswb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_concat:V16QI
+ (ss_truncate:V8QI
+ (match_operand:V8HI 1 "register_operand" "x"))
+ (ss_truncate:V8QI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_AVX"
+ "vpacksswb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_packsswb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_concat:V16QI
+ (ss_truncate:V8QI
+ (match_operand:V8HI 1 "register_operand" "0"))
+ (ss_truncate:V8QI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_SSE2"
+ "packsswb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_packssdw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (ss_truncate:V4HI
+ (match_operand:V4SI 1 "register_operand" "x"))
+ (ss_truncate:V4HI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_AVX"
+ "vpackssdw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_packssdw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (ss_truncate:V4HI
+ (match_operand:V4SI 1 "register_operand" "0"))
+ (ss_truncate:V4HI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_SSE2"
+ "packssdw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_packuswb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_concat:V16QI
+ (us_truncate:V8QI
+ (match_operand:V8HI 1 "register_operand" "x"))
+ (us_truncate:V8QI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_AVX"
+ "vpackuswb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_packuswb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_concat:V16QI
+ (us_truncate:V8QI
+ (match_operand:V8HI 1 "register_operand" "0"))
+ (us_truncate:V8QI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_SSE2"
+ "packuswb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_punpckhbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 8) (const_int 24)
+ (const_int 9) (const_int 25)
+ (const_int 10) (const_int 26)
+ (const_int 11) (const_int 27)
+ (const_int 12) (const_int 28)
+ (const_int 13) (const_int 29)
+ (const_int 14) (const_int 30)
+ (const_int 15) (const_int 31)])))]
+ "TARGET_AVX"
+ "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 8) (const_int 24)
+ (const_int 9) (const_int 25)
+ (const_int 10) (const_int 26)
+ (const_int 11) (const_int 27)
+ (const_int 12) (const_int 28)
+ (const_int 13) (const_int 29)
+ (const_int 14) (const_int 30)
+ (const_int 15) (const_int 31)])))]
+ "TARGET_SSE2"
+ "punpckhbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_punpcklbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 16)
+ (const_int 1) (const_int 17)
+ (const_int 2) (const_int 18)
+ (const_int 3) (const_int 19)
+ (const_int 4) (const_int 20)
+ (const_int 5) (const_int 21)
+ (const_int 6) (const_int 22)
+ (const_int 7) (const_int 23)])))]
+ "TARGET_AVX"
+ "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_select:V16QI
+ (vec_concat:V32QI
+ (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 16)
+ (const_int 1) (const_int 17)
+ (const_int 2) (const_int 18)
+ (const_int 3) (const_int 19)
+ (const_int 4) (const_int 20)
+ (const_int 5) (const_int 21)
+ (const_int 6) (const_int 22)
+ (const_int 7) (const_int 23)])))]
+ "TARGET_SSE2"
+ "punpcklbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_punpckhwd"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))]
+ "TARGET_AVX"
+ "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhwd"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 4) (const_int 12)
+ (const_int 5) (const_int 13)
+ (const_int 6) (const_int 14)
+ (const_int 7) (const_int 15)])))]
+ "TARGET_SSE2"
+ "punpckhwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_punpcklwd"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)])))]
+ "TARGET_AVX"
+ "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpcklwd"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (vec_concat:V16HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 8)
+ (const_int 1) (const_int 9)
+ (const_int 2) (const_int 10)
+ (const_int 3) (const_int 11)])))]
+ "TARGET_SSE2"
+ "punpcklwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_punpckhdq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_AVX"
+ "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckhdq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_SSE2"
+ "punpckhdq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_punpckldq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_AVX"
+ "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_punpckldq"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_SSE2"
+ "punpckldq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_pinsr<avxmodesuffixs>"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (vec_merge:SSEMODE124
+ (vec_duplicate:SSEMODE124
+ (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
+ (match_operand:SSEMODE124 1 "register_operand" "x")
+ (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
+ "TARGET_AVX"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "vpinsr<avxmodesuffixs>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pinsrb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_merge:V16QI
+ (vec_duplicate:V16QI
+ (match_operand:QI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
+ "TARGET_SSE4_1"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_pinsrw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_merge:V8HI
+ (vec_duplicate:V8HI
+ (match_operand:HI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
+ "TARGET_SSE2"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+;; It must come before sse2_loadld since it is preferred.
+(define_insn "*sse4_1_pinsrd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_merge:V4SI
+ (vec_duplicate:V4SI
+ (match_operand:SI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_SSE4_1"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_pinsrq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_merge:V2DI
+ (vec_duplicate:V2DI
+ (match_operand:DI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
+ "TARGET_AVX && TARGET_64BIT"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pinsrq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_merge:V2DI
+ (vec_duplicate:V2DI
+ (match_operand:DI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
+ "TARGET_SSE4_1 && TARGET_64BIT"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrb"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI
+ (vec_select:QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
+ "TARGET_SSE4_1"
+ "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrb_memory"
+ [(set (match_operand:QI 0 "memory_operand" "=m")
+ (vec_select:QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_pextrw"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
+ "TARGET_SSE2"
+ "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrw_memory"
+ [(set (match_operand:HI 0 "memory_operand" "=m")
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrd"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+;; It must come before *vec_extractv2di_1_sse since it is preferred.
+(define_insn "*sse4_1_pextrq"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
+ "TARGET_SSE4_1 && TARGET_64BIT"
+ "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_pshufd"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")]
+ "TARGET_SSE2"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3)));
+ DONE;
+})
+
+(define_insn "sse2_pshufd_1"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_select:V4SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "")
+ (match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_0_to_3_operand" "")])))]
+ "TARGET_SSE2"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_pshuflw"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")]
+ "TARGET_SSE2"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3)));
+ DONE;
+})
+
+(define_insn "sse2_pshuflw_1"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "")
+ (match_operand 3 "const_0_to_3_operand" "")
+ (match_operand 4 "const_0_to_3_operand" "")
+ (match_operand 5 "const_0_to_3_operand" "")
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)])))]
+ "TARGET_SSE2"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_pshufhw"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_int_operand" "")]
+ "TARGET_SSE2"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4)));
+ DONE;
+})
+
+(define_insn "sse2_pshufhw_1"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_select:V8HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (match_operand 2 "const_4_to_7_operand" "")
+ (match_operand 3 "const_4_to_7_operand" "")
+ (match_operand 4 "const_4_to_7_operand" "")
+ (match_operand 5 "const_4_to_7_operand" "")])))]
+ "TARGET_SSE2"
+{
+ int mask = 0;
+ mask |= (INTVAL (operands[2]) - 4) << 0;
+ mask |= (INTVAL (operands[3]) - 4) << 2;
+ mask |= (INTVAL (operands[4]) - 4) << 4;
+ mask |= (INTVAL (operands[5]) - 4) << 6;
+ operands[2] = GEN_INT (mask);
+
+ return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_rep" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_loadd"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (vec_merge:V4SI
+ (vec_duplicate:V4SI
+ (match_operand:SI 1 "nonimmediate_operand" ""))
+ (match_dup 2)
+ (const_int 1)))]
+ "TARGET_SSE"
+ "operands[2] = CONST0_RTX (V4SImode);")
+
+(define_insn "*avx_loadld"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x")
+ (vec_merge:V4SI
+ (vec_duplicate:V4SI
+ (match_operand:SI 2 "nonimmediate_operand" "m ,r ,x"))
+ (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "@
+ vmovd\t{%2, %0|%0, %2}
+ vmovd\t{%2, %0|%0, %2}
+ vmovss\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI,TI,V4SF")])
+
+(define_insn "sse2_loadld"
+ [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
+ (vec_merge:V4SI
+ (vec_duplicate:V4SI
+ (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
+ (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "@
+ movd\t{%2, %0|%0, %2}
+ movd\t{%2, %0|%0, %2}
+ movss\t{%2, %0|%0, %2}
+ movss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "TI,TI,V4SF,SF")])
+
+(define_insn_and_split "sse2_stored"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "x,Yi")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE"
+ "#"
+ "&& reload_completed
+ && (TARGET_INTER_UNIT_MOVES
+ || MEM_P (operands [0])
+ || !GENERAL_REGNO_P (true_regnum (operands [0])))"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
+})
+
+(define_insn_and_split "*vec_ext_v4si_mem"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (vec_select:SI
+ (match_operand:V4SI 1 "memory_operand" "o")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+{
+ int i = INTVAL (operands[2]);
+
+ emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
+ DONE;
+})
+
+(define_expand "sse_storeq"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE"
+ "")
+
+(define_insn "*sse2_storeq_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,*r,r")
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
+ (parallel [(const_int 0)])))]
+ "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ #
+ #
+ %vmov{q}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "*,*,imov")
+ (set_attr "prefix" "*,*,maybe_vex")
+ (set_attr "mode" "*,*,DI")])
+
+(define_insn "*sse2_storeq"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "x")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE"
+ "#")
+
+(define_split
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE
+ && reload_completed
+ && (TARGET_INTER_UNIT_MOVES
+ || MEM_P (operands [0])
+ || !GENERAL_REGNO_P (true_regnum (operands [0])))"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
+})
+
+(define_insn "*vec_extractv2di_1_rex64_avx"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_64BIT
+ && TARGET_AVX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ vmovhps\t{%1, %0|%0, %1}
+ vpsrldq\t{$8, %1, %0|%0, %1, 8}
+ vmovq\t{%H1, %0|%0, %H1}
+ vmov{q}\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov,sseishft,ssemov,imov")
+ (set_attr "memory" "*,none,*,*")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2SF,TI,TI,DI")])
+
+(define_insn "*vec_extractv2di_1_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ psrldq\t{$8, %0|%0, 8}
+ movq\t{%H1, %0|%0, %H1}
+ mov{q}\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov,sseishft,ssemov,imov")
+ (set_attr "memory" "*,none,*,*")
+ (set_attr "mode" "V2SF,TI,TI,DI")])
+
+(define_insn "*vec_extractv2di_1_avx"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 1)])))]
+ "!TARGET_64BIT
+ && TARGET_AVX
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ vmovhps\t{%1, %0|%0, %1}
+ vpsrldq\t{$8, %1, %0|%0, %1, 8}
+ vmovq\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov,sseishft,ssemov")
+ (set_attr "memory" "*,none,*")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V2SF,TI,TI")])
+
+(define_insn "*vec_extractv2di_1_sse2"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
+ (parallel [(const_int 1)])))]
+ "!TARGET_64BIT
+ && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ psrldq\t{$8, %0|%0, 8}
+ movq\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov,sseishft,ssemov")
+ (set_attr "memory" "*,none,*")
+ (set_attr "mode" "V2SF,TI,TI")])
+
+;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
+(define_insn "*vec_extractv2di_1_sse"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 1)])))]
+ "!TARGET_SSE2 && TARGET_SSE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ movhlps\t{%1, %0|%0, %1}
+ movlps\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "*vec_dupv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
+ (vec_duplicate:V4SI
+ (match_operand:SI 1 "register_operand" " Y2,0")))]
+ "TARGET_SSE"
+ "@
+ %vpshufd\t{$0, %1, %0|%0, %1, 0}
+ shufps\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "maybe_vex,orig")
+ (set_attr "mode" "TI,V4SF")])
+
+(define_insn "*vec_dupv2di_avx"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_duplicate:V2DI
+ (match_operand:DI 1 "register_operand" "x")))]
+ "TARGET_AVX"
+ "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*vec_dupv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
+ (vec_duplicate:V2DI
+ (match_operand:DI 1 "register_operand" " 0 ,0")))]
+ "TARGET_SSE"
+ "@
+ punpcklqdq\t%0, %0
+ movlhps\t%0, %0"
+ [(set_attr "type" "sselog1,ssemov")
+ (set_attr "mode" "TI,V4SF")])
+
+(define_insn "*vec_concatv2si_avx"
+ [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
+ (vec_concat:V2SI
+ (match_operand:SI 1 "nonimmediate_operand" "x ,x,rm, 0 ,rm")
+ (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
+ "TARGET_AVX"
+ "@
+ vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
+ vpunpckldq\t{%2, %1, %0|%0, %1, %2}
+ vmovd\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "3,4")
+ (const_string "orig")
+ (const_string "vex")))
+ (set_attr "mode" "TI,TI,TI,DI,DI")])
+
+(define_insn "*vec_concatv2si_sse4_1"
+ [(set (match_operand:V2SI 0 "register_operand" "=x,x,x ,*y ,*y")
+ (vec_concat:V2SI
+ (match_operand:SI 1 "nonimmediate_operand" "0 ,0,rm, 0 ,rm")
+ (match_operand:SI 2 "vector_move_operand" "rm,x,C ,*ym,C")))]
+ "TARGET_SSE4_1"
+ "@
+ pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
+ (set_attr "prefix_extra" "1,*,*,*,*")
+ (set_attr "mode" "TI,TI,TI,DI,DI")])
+
+;; ??? In theory we can match memory for the MMX alternative, but allowing
+;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
+;; alternatives pretty much forces the MMX alternative to be chosen.
+(define_insn "*vec_concatv2si_sse2"
+ [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
+ (vec_concat:V2SI
+ (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
+ (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
+ "TARGET_SSE2"
+ "@
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+ (set_attr "mode" "TI,TI,DI,DI")])
+
+(define_insn "*vec_concatv2si_sse"
+ [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
+ (vec_concat:V2SI
+ (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
+ (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
+ "TARGET_SSE"
+ "@
+ unpcklps\t{%2, %0|%0, %2}
+ movss\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+ (set_attr "mode" "V4SF,V4SF,DI,DI")])
+
+(define_insn "*vec_concatv4si_1_avx"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+ (vec_concat:V4SI
+ (match_operand:V2SI 1 "register_operand" " x,x")
+ (match_operand:V2SI 2 "nonimmediate_operand" " x,m")))]
+ "TARGET_AVX"
+ "@
+ vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
+ vmovhps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI,V2SF")])
+
+(define_insn "*vec_concatv4si_1"
+ [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
+ (vec_concat:V4SI
+ (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
+ (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
+ "TARGET_SSE"
+ "@
+ punpcklqdq\t{%2, %0|%0, %2}
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "TI,V4SF,V2SF")])
+
+(define_insn "*vec_concatv2di_avx"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x")
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" " m,*y,x,x")
+ (match_operand:DI 2 "vector_move_operand" " C, C,x,m")))]
+ "!TARGET_64BIT && TARGET_AVX"
+ "@
+ vmovq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
+ vmovhps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemov,ssemov,sselog,ssemov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "1")
+ (const_string "orig")
+ (const_string "vex")))
+ (set_attr "mode" "TI,TI,TI,V2SF")])
+
+(define_insn "vec_concatv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,?Y2,Y2,x,x")
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" " mY2,*y ,0 ,0,0")
+ (match_operand:DI 2 "vector_move_operand" " C , C,Y2,x,m")))]
+ "!TARGET_64BIT && TARGET_SSE"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ punpcklqdq\t{%2, %0|%0, %2}
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
+ (set_attr "mode" "TI,TI,TI,V4SF,V2SF")])
+
+(define_insn "*vec_concatv2di_rex64_avx"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,Yi,!x,x,x")
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" " x,m,r ,*y,x,x")
+ (match_operand:DI 2 "vector_move_operand" "rm,C,C ,C ,x,m")))]
+ "TARGET_64BIT && TARGET_AVX"
+ "@
+ vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
+ vmovq\t{%1, %0|%0, %1}
+ vmovq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
+ vmovhps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
+ (set (attr "prefix")
+ (if_then_else (eq_attr "alternative" "3")
+ (const_string "orig")
+ (const_string "vex")))
+ (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
+
+(define_insn "*vec_concatv2di_rex64_sse4_1"
+ [(set (match_operand:V2DI 0 "register_operand" "=x ,x ,Yi,!x,x,x,x")
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" " 0 ,mx,r ,*y,0,0,0")
+ (match_operand:DI 2 "vector_move_operand" " rm,C ,C ,C ,x,x,m")))]
+ "TARGET_64BIT && TARGET_SSE4_1"
+ "@
+ pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ punpcklqdq\t{%2, %0|%0, %2}
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
+ (set_attr "prefix_extra" "1,*,*,*,*,*,*")
+ (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
+
+(define_insn "*vec_concatv2di_rex64_sse"
+ [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
+ (vec_concat:V2DI
+ (match_operand:DI 1 "nonimmediate_operand" " mY2,r ,*y ,0 ,0,0")
+ (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
+ "TARGET_64BIT && TARGET_SSE"
+ "@
+ movq\t{%1, %0|%0, %1}
+ movq\t{%1, %0|%0, %1}
+ movq2dq\t{%1, %0|%0, %1}
+ punpcklqdq\t{%2, %0|%0, %2}
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
+ (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
+
+(define_expand "vec_unpacku_hi_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, true);
+ else if (TARGET_SSE5)
+ ix86_expand_sse5_unpack (operands, true, true);
+ else
+ ix86_expand_sse_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, true);
+ else if (TARGET_SSE5)
+ ix86_expand_sse5_unpack (operands, false, true);
+ else
+ ix86_expand_sse_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, false);
+ else if (TARGET_SSE5)
+ ix86_expand_sse5_unpack (operands, true, false);
+ else
+ ix86_expand_sse_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, false);
+ else if (TARGET_SSE5)
+ ix86_expand_sse5_unpack (operands, false, false);
+ else
+ ix86_expand_sse_unpack (operands, false, false);
+ DONE;
+})
+
+(define_expand "vec_unpacku_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, true);
+ else if (TARGET_SSE5)
+ ix86_expand_sse5_unpack (operands, true, true);
+ else
+ ix86_expand_sse_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, true);
+ else if (TARGET_SSE5)
+ ix86_expand_sse5_unpack (operands, false, true);
+ else
+ ix86_expand_sse_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, false);
+ else if (TARGET_SSE5)
+ ix86_expand_sse5_unpack (operands, true, false);
+ else
+ ix86_expand_sse_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, false);
+ else if (TARGET_SSE5)
+ ix86_expand_sse5_unpack (operands, false, false);
+ else
+ ix86_expand_sse_unpack (operands, false, false);
+ DONE;
+})
+
+(define_expand "vec_unpacku_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, true);
+ else if (TARGET_SSE5)
+ ix86_expand_sse5_unpack (operands, true, true);
+ else
+ ix86_expand_sse_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, true);
+ else if (TARGET_SSE5)
+ ix86_expand_sse5_unpack (operands, false, true);
+ else
+ ix86_expand_sse_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, false);
+ else if (TARGET_SSE5)
+ ix86_expand_sse5_unpack (operands, true, false);
+ else
+ ix86_expand_sse_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, false);
+ else if (TARGET_SSE5)
+ ix86_expand_sse5_unpack (operands, false, false);
+ else
+ ix86_expand_sse_unpack (operands, false, false);
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Miscellaneous
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "sse2_uavgv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "")
+ (truncate:V16QI
+ (lshiftrt:V16HI
+ (plus:V16HI
+ (plus:V16HI
+ (zero_extend:V16HI
+ (match_operand:V16QI 1 "nonimmediate_operand" ""))
+ (zero_extend:V16HI
+ (match_operand:V16QI 2 "nonimmediate_operand" "")))
+ (const_vector:V16QI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
+
+(define_insn "*avx_uavgv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (truncate:V16QI
+ (lshiftrt:V16HI
+ (plus:V16HI
+ (plus:V16HI
+ (zero_extend:V16HI
+ (match_operand:V16QI 1 "nonimmediate_operand" "%x"))
+ (zero_extend:V16HI
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
+ (const_vector:V16QI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
+ "vpavgb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_uavgv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (truncate:V16QI
+ (lshiftrt:V16HI
+ (plus:V16HI
+ (plus:V16HI
+ (zero_extend:V16HI
+ (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:V16HI
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
+ (const_vector:V16QI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
+ "pavgb\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_uavgv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (plus:V8SI
+ (plus:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" ""))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
+
+(define_insn "*avx_uavgv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (plus:V8SI
+ (plus:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
+ "vpavgw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_uavgv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (plus:V8SI
+ (plus:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
+ "pavgw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+;; The correct representation for this is absolutely enormous, and
+;; surely not generally useful.
+(define_insn "*avx_psadbw"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSADBW))]
+ "TARGET_AVX"
+ "vpsadbw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse2_psadbw"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSADBW))]
+ "TARGET_SSE2"
+ "psadbw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx_movmskp<avxmodesuffixf2c>256"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI
+ [(match_operand:AVX256MODEF2P 1 "register_operand" "x")]
+ UNSPEC_MOVMSK))]
+ "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "vmovmskp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<sse>_movmskp<ssemodesuffixf2c>"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI
+ [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
+ UNSPEC_MOVMSK))]
+ "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+ "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sse2_pmovmskb"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE2"
+ "%vpmovmskb\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
+(define_expand "sse2_maskmovdqu"
+ [(set (match_operand:V16QI 0 "memory_operand" "")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
+ (match_operand:V16QI 2 "register_operand" "")
+ (match_dup 0)]
+ UNSPEC_MASKMOV))]
+ "TARGET_SSE2"
+ "")
+
+(define_insn "*sse2_maskmovdqu"
+ [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "register_operand" "x")
+ (mem:V16QI (match_dup 0))]
+ UNSPEC_MASKMOV))]
+ "TARGET_SSE2 && !TARGET_64BIT"
+ ;; @@@ check ordering of operands in intel/nonintel syntax
+ "%vmaskmovdqu\t{%2, %1|%1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_maskmovdqu_rex64"
+ [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "register_operand" "x")
+ (mem:V16QI (match_dup 0))]
+ UNSPEC_MASKMOV))]
+ "TARGET_SSE2 && TARGET_64BIT"
+ ;; @@@ check ordering of operands in intel/nonintel syntax
+ "%vmaskmovdqu\t{%2, %1|%1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse_ldmxcsr"
+ [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
+ UNSPECV_LDMXCSR)]
+ "TARGET_SSE"
+ "%vldmxcsr\t%0"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "memory" "load")])
+
+(define_insn "sse_stmxcsr"
+ [(set (match_operand:SI 0 "memory_operand" "=m")
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
+ "TARGET_SSE"
+ "%vstmxcsr\t%0"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "memory" "store")])
+
+(define_expand "sse_sfence"
+ [(set (match_dup 0)
+ (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+{
+ operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sse_sfence"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
+ "TARGET_SSE || TARGET_3DNOW_A"
+ "sfence"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "unknown")])
+
+(define_insn "sse2_clflush"
+ [(unspec_volatile [(match_operand 0 "address_operand" "p")]
+ UNSPECV_CLFLUSH)]
+ "TARGET_SSE2"
+ "clflush\t%a0"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "unknown")])
+
+(define_expand "sse2_mfence"
+ [(set (match_dup 0)
+ (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+ "TARGET_SSE2"
+{
+ operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sse2_mfence"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+ "TARGET_64BIT || TARGET_SSE2"
+ "mfence"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "unknown")])
+
+(define_expand "sse2_lfence"
+ [(set (match_dup 0)
+ (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
+ "TARGET_SSE2"
+{
+ operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+(define_insn "*sse2_lfence"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
+ "TARGET_SSE2"
+ "lfence"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "unknown")])
+
+(define_insn "sse3_mwait"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
+ (match_operand:SI 1 "register_operand" "c")]
+ UNSPECV_MWAIT)]
+ "TARGET_SSE3"
+;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
+;; Since 32bit register operands are implicitly zero extended to 64bit,
+;; we only need to set up 32bit registers.
+ "mwait"
+ [(set_attr "length" "3")])
+
+(define_insn "sse3_monitor"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
+ (match_operand:SI 1 "register_operand" "c")
+ (match_operand:SI 2 "register_operand" "d")]
+ UNSPECV_MONITOR)]
+ "TARGET_SSE3 && !TARGET_64BIT"
+ "monitor\t%0, %1, %2"
+ [(set_attr "length" "3")])
+
+(define_insn "sse3_monitor64"
+ [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
+ (match_operand:SI 1 "register_operand" "c")
+ (match_operand:SI 2 "register_operand" "d")]
+ UNSPECV_MONITOR)]
+ "TARGET_SSE3 && TARGET_64BIT"
+;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
+;; RCX and RDX are used. Since 32bit register operands are implicitly
+;; zero extended to 64bit, we only need to set up 32bit registers.
+ "monitor"
+ [(set_attr "length" "3")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; SSSE3 instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "*avx_phaddwv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_AVX"
+ "vphaddw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phaddwv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_SSSE3"
+ "phaddw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phaddwv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (plus:HI
+ (vec_select:HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSSE3"
+ "phaddw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*avx_phadddv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_AVX"
+ "vphaddd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phadddv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSSE3"
+ "phaddd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phadddv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_concat:V2SI
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V2SI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (plus:SI
+ (vec_select:SI
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_SSSE3"
+ "phaddd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*avx_phaddswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_AVX"
+ "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phaddswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_SSSE3"
+ "phaddsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phaddswv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_plus:HI
+ (vec_select:HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_plus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSSE3"
+ "phaddsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*avx_phsubwv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_AVX"
+ "vphsubw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubwv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_SSSE3"
+ "phsubw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubwv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (minus:HI
+ (vec_select:HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSSE3"
+ "phsubw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*avx_phsubdv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_AVX"
+ "vphsubd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubdv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_concat:V4SI
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSSE3"
+ "phsubd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubdv2si3"
+ [(set (match_operand:V2SI 0 "register_operand" "=y")
+ (vec_concat:V2SI
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V2SI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
+ (minus:SI
+ (vec_select:SI
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
+ "TARGET_SSSE3"
+ "phsubd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*avx_phsubswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_AVX"
+ "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
+ "TARGET_SSSE3"
+ "phsubsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_phsubswv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (vec_concat:V4HI
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V4HI 1 "register_operand" "0")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
+ (vec_concat:V2HI
+ (ss_minus:HI
+ (vec_select:HI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
+ (ss_minus:HI
+ (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
+ (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
+ "TARGET_SSSE3"
+ "phsubsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*avx_pmaddubsw128"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (ss_plus:V8HI
+ (mult:V8HI
+ (zero_extend:V8HI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)])))
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)]))))
+ (mult:V8HI
+ (zero_extend:V8HI
+ (vec_select:V16QI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)])))
+ (sign_extend:V8HI
+ (vec_select:V16QI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)]))))))]
+ "TARGET_AVX"
+ "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_pmaddubsw128"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (ss_plus:V8HI
+ (mult:V8HI
+ (zero_extend:V8HI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "register_operand" "0")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)])))
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)]))))
+ (mult:V8HI
+ (zero_extend:V8HI
+ (vec_select:V16QI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)])))
+ (sign_extend:V8HI
+ (vec_select:V16QI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)]))))))]
+ "TARGET_SSSE3"
+ "pmaddubsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_pmaddubsw"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (ss_plus:V4HI
+ (mult:V4HI
+ (zero_extend:V4HI
+ (vec_select:V4QI
+ (match_operand:V8QI 1 "register_operand" "0")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4HI
+ (vec_select:V4QI
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4HI
+ (zero_extend:V4HI
+ (vec_select:V8QI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4HI
+ (vec_select:V8QI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))))]
+ "TARGET_SSSE3"
+ "pmaddubsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+(define_expand "ssse3_pmulhrswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (plus:V8SI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" ""))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))
+ (const_int 14))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSSE3"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*avx_pmulhrswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (plus:V8SI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x"))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 14))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*ssse3_pmulhrswv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (plus:V8SI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 14))
+ (const_vector:V8HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmulhrsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "ssse3_pmulhrswv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (plus:V4SI
+ (lshiftrt:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" ""))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "")))
+ (const_int 14))
+ (const_vector:V4HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSSE3"
+ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
+(define_insn "*ssse3_pmulhrswv4hi3"
+ [(set (match_operand:V4HI 0 "register_operand" "=y")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (plus:V4SI
+ (lshiftrt:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (const_int 14))
+ (const_vector:V4HI [(const_int 1) (const_int 1)
+ (const_int 1) (const_int 1)]))
+ (const_int 1))))]
+ "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+ "pmulhrsw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*avx_pshufbv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSHUFB))]
+ "TARGET_AVX"
+ "vpshufb\t{%2, %1, %0|%0, %1, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_pshufbv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSHUFB))]
+ "TARGET_SSSE3"
+ "pshufb\t{%2, %0|%0, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_pshufbv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y")
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PSHUFB))]
+ "TARGET_SSSE3"
+ "pshufb\t{%2, %0|%0, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*avx_psign<mode>3"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (unspec:SSEMODE124
+ [(match_operand:SSEMODE124 1 "register_operand" "x")
+ (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSIGN))]
+ "TARGET_AVX"
+ "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_psign<mode>3"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (unspec:SSEMODE124
+ [(match_operand:SSEMODE124 1 "register_operand" "0")
+ (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
+ UNSPEC_PSIGN))]
+ "TARGET_SSSE3"
+ "psign<ssevecsize>\t{%2, %0|%0, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_psign<mode>3"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (unspec:MMXMODEI
+ [(match_operand:MMXMODEI 1 "register_operand" "0")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+ UNSPEC_PSIGN))]
+ "TARGET_SSSE3"
+ "psign<mmxvecsize>\t{%2, %0|%0, %2}";
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "*avx_palignrti"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (unspec:TI [(match_operand:TI 1 "register_operand" "x")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+ UNSPEC_PALIGNR))]
+ "TARGET_AVX"
+{
+ operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+ return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_palignrti"
+ [(set (match_operand:TI 0 "register_operand" "=x")
+ (unspec:TI [(match_operand:TI 1 "register_operand" "0")
+ (match_operand:TI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+ UNSPEC_PALIGNR))]
+ "TARGET_SSSE3"
+{
+ operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+ return "palignr\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "ssse3_palignrdi"
+ [(set (match_operand:DI 0 "register_operand" "=y")
+ (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "nonimmediate_operand" "ym")
+ (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+ UNSPEC_PALIGNR))]
+ "TARGET_SSSE3"
+{
+ operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+ return "palignr\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "abs<mode>2"
+ [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+ (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSSE3"
+ "%vpabs<ssevecsize>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "abs<mode>2"
+ [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+ (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
+ "TARGET_SSSE3"
+ "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; AMD SSE4A instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse4a_movnt<mode>"
+ [(set (match_operand:MODEF 0 "memory_operand" "=m")
+ (unspec:MODEF
+ [(match_operand:MODEF 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE4A"
+ "movnts<ssemodefsuffix>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sse4a_vmmovnt<mode>"
+ [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
+ (unspec:<ssescalarmode>
+ [(vec_select:<ssescalarmode>
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (parallel [(const_int 0)]))]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE4A"
+ "movnts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "sse4a_extrqi"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")]
+ UNSPEC_EXTRQI))]
+ "TARGET_SSE4A"
+ "extrq\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4a_extrq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "register_operand" "x")]
+ UNSPEC_EXTRQ))]
+ "TARGET_SSE4A"
+ "extrq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertqi"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "x")
+ (match_operand 3 "const_int_operand" "")
+ (match_operand 4 "const_int_operand" "")]
+ UNSPEC_INSERTQI))]
+ "TARGET_SSE4A"
+ "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+ [(set_attr "type" "sseins")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "x")]
+ UNSPEC_INSERTQ))]
+ "TARGET_SSE4A"
+ "insertq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseins")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Intel SSE4.1 instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "avx_blendp<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (vec_merge:AVXMODEF2P
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
+ "TARGET_AVX"
+ "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:AVXMODEF2P 3 "register_operand" "x")]
+ UNSPEC_BLENDV))]
+ "TARGET_AVX"
+ "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse4_1_blendp<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
+ "TARGET_SSE4_1"
+ "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "reg_not_xmm0_operand" "0")
+ (match_operand:SSEMODEF2P 2 "nonimm_not_xmm0_operand" "xm")
+ (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1"
+ "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+ (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_DP))]
+ "TARGET_AVX"
+ "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemul")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
+
+(define_insn "sse4_1_dpp<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_DP))]
+ "TARGET_SSE4_1"
+ "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sse4_1_movntdqa"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
+ UNSPEC_MOVNTDQA))]
+ "TARGET_SSE4_1"
+ "%vmovntdqa\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_mpsadbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_MPSADBW))]
+ "TARGET_AVX"
+ "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_mpsadbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_MPSADBW))]
+ "TARGET_SSE4_1"
+ "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_packusdw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (us_truncate:V4HI
+ (match_operand:V4SI 1 "register_operand" "x"))
+ (us_truncate:V4HI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_AVX"
+ "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_packusdw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (us_truncate:V4HI
+ (match_operand:V4SI 1 "register_operand" "0"))
+ (us_truncate:V4HI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_SSE4_1"
+ "packusdw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_pblendvb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (match_operand:V16QI 3 "register_operand" "x")]
+ UNSPEC_BLENDV))]
+ "TARGET_AVX"
+ "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_pblendvb"
+ [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "reg_not_xmm0_operand" "0")
+ (match_operand:V16QI 2 "nonimm_not_xmm0_operand" "xm")
+ (match_operand:V16QI 3 "register_operand" "Yz")]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1"
+ "pblendvb\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_pblendw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_merge:V8HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")))]
+ "TARGET_AVX"
+ "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_pblendw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_merge:V8HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")))]
+ "TARGET_SSE4_1"
+ "pblendw\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_phminposuw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
+ UNSPEC_PHMINPOSUW))]
+ "TARGET_SSE4_1"
+ "%vphminposuw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv8qiv8hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovsxbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv8qiv8hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (vec_duplicate:V16QI
+ (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovsxbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv4qiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovsxbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv4qiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (vec_duplicate:V16QI
+ (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovsxbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv2qiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovsxbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv2qiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (vec_duplicate:V16QI
+ (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovsxbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv4hiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovsxwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv4hiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (vec_duplicate:V8HI
+ (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovsxwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv2hiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovsxwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv2hiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (vec_duplicate:V8HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovsxwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv2siv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovsxdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv2siv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (vec_duplicate:V4SI
+ (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovsxdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv8qiv8hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (zero_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovzxbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv8qiv8hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (zero_extend:V8HI
+ (vec_select:V8QI
+ (vec_duplicate:V16QI
+ (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovzxbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv4qiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovzxbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv4qiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (vec_duplicate:V16QI
+ (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovzxbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv2qiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovzxbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv2qiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (vec_duplicate:V16QI
+ (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovzxbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv4hiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (zero_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovzxwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv4hiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (zero_extend:V4SI
+ (vec_select:V4HI
+ (vec_duplicate:V8HI
+ (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovzxwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv2hiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovzxwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv2hiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (vec_duplicate:V8HI
+ (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovzxwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv2siv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovzxdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv2siv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (vec_duplicate:V4SI
+ (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "%vpmovzxdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+;; ptestps/ptestpd are very similar to comiss and ucomiss when
+;; setting FLAGS_REG. But it is not a really compare instruction.
+(define_insn "avx_vtestp<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:AVXMODEF2P 0 "register_operand" "x")
+ (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
+ UNSPEC_VTESTP))]
+ "TARGET_AVX"
+ "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
+;; But it is not a really compare instruction.
+(define_insn "avx_ptest256"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
+ (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
+ UNSPEC_PTEST))]
+ "TARGET_AVX"
+ "vptest\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_insn "sse4_1_ptest"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
+ (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
+ UNSPEC_PTEST))]
+ "TARGET_SSE4_1"
+ "%vptest\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "avx_roundp<avxmodesuffixf2c>256"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (unspec:AVX256MODEF2P
+ [(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND))]
+ "TARGET_AVX"
+ "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sse4_1_roundp<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND))]
+ "TARGET_ROUND"
+ "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx_rounds<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 2 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND)
+ (match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (const_int 1)))]
+ "TARGET_AVX"
+ "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sse4_1_rounds<ssemodesuffixf2c>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 2 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUND)
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_ROUND"
+ "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "<MODE>")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Intel SSE4.2 string/text processing instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_and_split "sse4_2_pcmpestr"
+ [(set (match_operand:SI 0 "register_operand" "=c,c")
+ (unspec:SI
+ [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
+ (match_operand:SI 3 "register_operand" "a,a")
+ (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
+ (match_operand:SI 5 "register_operand" "d,d")
+ (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPESTR))
+ (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
+ (unspec:V16QI
+ [(match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)
+ (match_dup 6)]
+ UNSPEC_PCMPESTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)
+ (match_dup 6)]
+ UNSPEC_PCMPESTR))]
+ "TARGET_SSE4_2
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+ int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+ int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+ if (ecx)
+ emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
+ operands[3], operands[4],
+ operands[5], operands[6]));
+ if (xmm0)
+ emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
+ operands[3], operands[4],
+ operands[5], operands[6]));
+ if (flags && !(ecx || xmm0))
+ emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
+ operands[2], operands[3],
+ operands[4], operands[5],
+ operands[6]));
+ DONE;
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestri"
+ [(set (match_operand:SI 0 "register_operand" "=c,c")
+ (unspec:SI
+ [(match_operand:V16QI 1 "register_operand" "x,x")
+ (match_operand:SI 2 "register_operand" "a,a")
+ (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+ (match_operand:SI 4 "register_operand" "d,d")
+ (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPESTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)]
+ UNSPEC_PCMPESTR))]
+ "TARGET_SSE4_2"
+ "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestrm"
+ [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
+ (unspec:V16QI
+ [(match_operand:V16QI 1 "register_operand" "x,x")
+ (match_operand:SI 2 "register_operand" "a,a")
+ (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
+ (match_operand:SI 4 "register_operand" "d,d")
+ (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPESTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)]
+ UNSPEC_PCMPESTR))]
+ "TARGET_SSE4_2"
+ "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpestr_cconly"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
+ (match_operand:SI 3 "register_operand" "a,a,a,a")
+ (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
+ (match_operand:SI 5 "register_operand" "d,d,d,d")
+ (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
+ UNSPEC_PCMPESTR))
+ (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
+ (clobber (match_scratch:SI 1 "= X, X,c,c"))]
+ "TARGET_SSE4_2"
+ "@
+ %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
+ %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
+ %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
+ %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load,none,load")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn_and_split "sse4_2_pcmpistr"
+ [(set (match_operand:SI 0 "register_operand" "=c,c")
+ (unspec:SI
+ [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
+ (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
+ (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPISTR))
+ (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
+ (unspec:V16QI
+ [(match_dup 2)
+ (match_dup 3)
+ (match_dup 4)]
+ UNSPEC_PCMPISTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 2)
+ (match_dup 3)
+ (match_dup 4)]
+ UNSPEC_PCMPISTR))]
+ "TARGET_SSE4_2
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+ int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
+ int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
+
+ if (ecx)
+ emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
+ operands[3], operands[4]));
+ if (xmm0)
+ emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
+ operands[3], operands[4]));
+ if (flags && !(ecx || xmm0))
+ emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
+ operands[2], operands[3],
+ operands[4]));
+ DONE;
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistri"
+ [(set (match_operand:SI 0 "register_operand" "=c,c")
+ (unspec:SI
+ [(match_operand:V16QI 1 "register_operand" "x,x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+ (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPISTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMPISTR))]
+ "TARGET_SSE4_2"
+ "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistrm"
+ [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
+ (unspec:V16QI
+ [(match_operand:V16QI 1 "register_operand" "x,x")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
+ (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
+ UNSPEC_PCMPISTR))
+ (set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMPISTR))]
+ "TARGET_SSE4_2"
+ "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_2_pcmpistr_cconly"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC
+ [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
+ (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
+ (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
+ UNSPEC_PCMPISTR))
+ (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
+ (clobber (match_scratch:SI 1 "= X, X,c,c"))]
+ "TARGET_SSE4_2"
+ "@
+ %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
+ %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
+ %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
+ %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "memory" "none,load,none,load")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; SSE5 instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; SSE5 parallel integer multiply/add instructions.
+;; Note the instruction does not allow the value being added to be a memory
+;; operation. However by pretending via the nonimmediate_operand predicate
+;; that it does and splitting it later allows the following to be recognized:
+;; a[i] = b[i] * c[i] + d[i];
+(define_insn "sse5_pmacsww"
+ [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
+ (plus:V8HI
+ (mult:V8HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm")
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x"))
+ (match_operand:V8HI 3 "register_operand" "0,0,0")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
+ "@
+ pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+;; Split pmacsww with two memory operands into a load and the pmacsww.
+(define_split
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (plus:V8HI
+ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" ""))
+ (match_operand:V8HI 3 "nonimmediate_operand" "")))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
+ && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
+ emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+})
+
+(define_insn "sse5_pmacssww"
+ [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
+ (ss_plus:V8HI
+ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x"))
+ (match_operand:V8HI 3 "register_operand" "0,0,0")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+;; Note the instruction does not allow the value being added to be a memory
+;; operation. However by pretending via the nonimmediate_operand predicate
+;; that it does and splitting it later allows the following to be recognized:
+;; a[i] = b[i] * c[i] + d[i];
+(define_insn "sse5_pmacsdd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (plus:V4SI
+ (mult:V4SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
+ (match_operand:V4SI 3 "register_operand" "0,0,0")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)"
+ "@
+ pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+;; Split pmacsdd with two memory operands into a load and the pmacsdd.
+(define_split
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (plus:V4SI
+ (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (match_operand:V4SI 2 "nonimmediate_operand" ""))
+ (match_operand:V4SI 3 "nonimmediate_operand" "")))]
+ "TARGET_SSE5
+ && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
+ && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])
+ && !reg_mentioned_p (operands[0], operands[3])"
+ [(const_int 0)]
+{
+ ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
+ emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+})
+
+(define_insn "sse5_pmacssdd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (ss_plus:V4SI
+ (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x"))
+ (match_operand:V4SI 3 "register_operand" "0,0,0")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pmacssdql"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+ (ss_plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (match_operand:V2DI 3 "register_operand" "0,0,0")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pmacssdqh"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+ (ss_plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 0)
+ (const_int 2)]))))
+ (match_operand:V2DI 3 "register_operand" "0,0,0")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pmacsdql"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 1)
+ (const_int 3)]))))
+ (match_operand:V2DI 3 "register_operand" "0,0,0")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn_and_split "*sse5_pmacsdql_mem"
+ [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 1)
+ (const_int 3)]))))
+ (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
+ "#"
+ "&& (reload_completed
+ || (!reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])))"
+ [(set (match_dup 0)
+ (match_dup 3))
+ (set (match_dup 0)
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)]))))
+ (match_dup 0)))])
+
+;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
+;; fake it with a multiply/add. In general, we expect the define_split to
+;; occur before register allocation, so we have to handle the corner case where
+;; the target is the same as operands 1/2
+(define_insn_and_split "sse5_mulv2div2di3_low"
+ [(set (match_operand:V2DI 0 "register_operand" "=&x")
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 1)
+ (const_int 3)])))))]
+ "TARGET_SSE5"
+ "#"
+ "&& (reload_completed
+ || (!reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])))"
+ [(set (match_dup 0)
+ (match_dup 3))
+ (set (match_dup 0)
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)]))))
+ (match_dup 0)))]
+{
+ operands[3] = CONST0_RTX (V2DImode);
+}
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pmacsdqh"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 0)
+ (const_int 2)]))))
+ (match_operand:V2DI 3 "register_operand" "0,0,0")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn_and_split "*sse5_pmacsdqh_mem"
+ [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 0)
+ (const_int 2)]))))
+ (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
+ "#"
+ "&& (reload_completed
+ || (!reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])))"
+ [(set (match_dup 0)
+ (match_dup 3))
+ (set (match_dup 0)
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 2)
+ (parallel [(const_int 0)
+ (const_int 2)]))))
+ (match_dup 0)))])
+
+;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
+;; fake it with a multiply/add. In general, we expect the define_split to
+;; occur before register allocation, so we have to handle the corner case where
+;; the target is the same as either operands[1] or operands[2]
+(define_insn_and_split "sse5_mulv2div2di3_high"
+ [(set (match_operand:V2DI 0 "register_operand" "=&x")
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%x")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)])))))]
+ "TARGET_SSE5"
+ "#"
+ "&& (reload_completed
+ || (!reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])))"
+ [(set (match_dup 0)
+ (match_dup 3))
+ (set (match_dup 0)
+ (plus:V2DI
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 2)
+ (parallel [(const_int 0)
+ (const_int 2)]))))
+ (match_dup 0)))]
+{
+ operands[3] = CONST0_RTX (V2DImode);
+}
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+;; SSE5 parallel integer multiply/add instructions for the intrinisics
+(define_insn "sse5_pmacsswd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (ss_plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))
+ (match_operand:V4SI 3 "register_operand" "0,0,0")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pmacswd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))
+ (match_operand:V4SI 3 "register_operand" "0,0,0")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pmadcsswd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (ss_plus:V4SI
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))))
+ (match_operand:V4SI 3 "register_operand" "0,0,0")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pmadcswd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
+ (plus:V4SI
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))))
+ (match_operand:V4SI 3 "register_operand" "0,0,0")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)"
+ "@
+ pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+;; SSE5 parallel XMM conditional moves
+(define_insn "sse5_pcmov_<mode>"
+ [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x")
+ (if_then_else:SSEMODE
+ (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x")
+ (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0")
+ (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
+ "@
+ pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
+ pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse4arg")])
+
+;; SSE5 horizontal add/subtract instructions
+(define_insn "sse5_phaddbw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (plus:V8HI
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)])))
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)])))))]
+ "TARGET_SSE5"
+ "phaddbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phaddbd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (plus:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 4)
+ (const_int 8)
+ (const_int 12)])))
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 5)
+ (const_int 9)
+ (const_int 13)]))))
+ (plus:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (match_dup 1)
+ (parallel [(const_int 2)
+ (const_int 6)
+ (const_int 10)
+ (const_int 14)])))
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (match_dup 1)
+ (parallel [(const_int 3)
+ (const_int 7)
+ (const_int 11)
+ (const_int 15)]))))))]
+ "TARGET_SSE5"
+ "phaddbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phaddbq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (plus:V2DI
+ (plus:V2DI
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 4)])))
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 5)]))))
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 2)
+ (const_int 6)])))
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 3)
+ (const_int 7)])))))
+ (plus:V2DI
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 8)
+ (const_int 12)])))
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 9)
+ (const_int 13)]))))
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 10)
+ (const_int 14)])))
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 11)
+ (const_int 15)])))))))]
+ "TARGET_SSE5"
+ "phaddbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phaddwd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))))]
+ "TARGET_SSE5"
+ "phaddwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phaddwq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (plus:V2DI
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 4)])))
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 5)]))))
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (match_dup 1)
+ (parallel [(const_int 2)
+ (const_int 6)])))
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (match_dup 1)
+ (parallel [(const_int 3)
+ (const_int 7)]))))))]
+ "TARGET_SSE5"
+ "phaddwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phadddq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (plus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)])))))]
+ "TARGET_SSE5"
+ "phadddq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phaddubw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (plus:V8HI
+ (zero_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)])))
+ (zero_extend:V8HI
+ (vec_select:V8QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)])))))]
+ "TARGET_SSE5"
+ "phaddubw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phaddubd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (plus:V4SI
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 4)
+ (const_int 8)
+ (const_int 12)])))
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 5)
+ (const_int 9)
+ (const_int 13)]))))
+ (plus:V4SI
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (match_dup 1)
+ (parallel [(const_int 2)
+ (const_int 6)
+ (const_int 10)
+ (const_int 14)])))
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (match_dup 1)
+ (parallel [(const_int 3)
+ (const_int 7)
+ (const_int 11)
+ (const_int 15)]))))))]
+ "TARGET_SSE5"
+ "phaddubd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phaddubq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (plus:V2DI
+ (plus:V2DI
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 4)])))
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 5)]))))
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 2)
+ (const_int 6)])))
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 3)
+ (const_int 7)])))))
+ (plus:V2DI
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 8)
+ (const_int 12)])))
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 9)
+ (const_int 13)]))))
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 10)
+ (const_int 14)])))
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_dup 1)
+ (parallel [(const_int 11)
+ (const_int 15)])))))))]
+ "TARGET_SSE5"
+ "phaddubq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phadduwd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (zero_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (zero_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))))]
+ "TARGET_SSE5"
+ "phadduwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phadduwq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (plus:V2DI
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 4)])))
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 5)]))))
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (match_dup 1)
+ (parallel [(const_int 2)
+ (const_int 6)])))
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (match_dup 1)
+ (parallel [(const_int 3)
+ (const_int 7)]))))))]
+ "TARGET_SSE5"
+ "phadduwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phaddudq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (plus:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)])))))]
+ "TARGET_SSE5"
+ "phaddudq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phsubbw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (minus:V8HI
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)
+ (const_int 8)
+ (const_int 10)
+ (const_int 12)
+ (const_int 14)])))
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)
+ (const_int 9)
+ (const_int 11)
+ (const_int 13)
+ (const_int 15)])))))]
+ "TARGET_SSE5"
+ "phsubbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phsubwd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (minus:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))))]
+ "TARGET_SSE5"
+ "phsubwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+(define_insn "sse5_phsubdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (minus:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)])))))]
+ "TARGET_SSE5"
+ "phsubdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseiadd1")])
+
+;; SSE5 permute instructions
+(define_insn "sse5_pperm"
+ [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
+ (unspec:V16QI
+ [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm")
+ (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x")
+ (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
+ UNSPEC_SSE5_PERMUTE))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
+ "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse4arg")
+ (set_attr "mode" "TI")])
+
+;; The following are for the various unpack insns which doesn't need the first
+;; source operand, so we can just use the output operand for the first operand.
+;; This allows either of the other two operands to be a memory operand. We
+;; can't just use the first operand as an argument to the normal pperm because
+;; then an output only argument, suddenly becomes an input operand.
+(define_insn "sse5_pperm_zero_v16qi_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+ (zero_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
+ (match_operand 2 "" "")))) ;; parallel with const_int's
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
+ "TARGET_SSE5
+ && (register_operand (operands[1], V16QImode)
+ || register_operand (operands[2], V16QImode))"
+ "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pperm_sign_v16qi_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=x,x")
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
+ (match_operand 2 "" "")))) ;; parallel with const_int's
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
+ "TARGET_SSE5
+ && (register_operand (operands[1], V16QImode)
+ || register_operand (operands[2], V16QImode))"
+ "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pperm_zero_v8hi_v4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+ (zero_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
+ (match_operand 2 "" "")))) ;; parallel with const_int's
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
+ "TARGET_SSE5
+ && (register_operand (operands[1], V8HImode)
+ || register_operand (operands[2], V16QImode))"
+ "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pperm_sign_v8hi_v4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
+ (match_operand 2 "" "")))) ;; parallel with const_int's
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
+ "TARGET_SSE5
+ && (register_operand (operands[1], V8HImode)
+ || register_operand (operands[2], V16QImode))"
+ "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pperm_zero_v4si_v2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
+ (match_operand 2 "" "")))) ;; parallel with const_int's
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
+ "TARGET_SSE5
+ && (register_operand (operands[1], V4SImode)
+ || register_operand (operands[2], V16QImode))"
+ "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pperm_sign_v4si_v2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
+ (match_operand 2 "" "")))) ;; parallel with const_int's
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
+ "TARGET_SSE5
+ && (register_operand (operands[1], V4SImode)
+ || register_operand (operands[2], V16QImode))"
+ "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "TI")])
+
+;; SSE5 pack instructions that combine two vectors into a smaller vector
+(define_insn "sse5_pperm_pack_v2di_v4si"
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
+ (vec_concat:V4SI
+ (truncate:V2SI
+ (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm"))
+ (truncate:V2SI
+ (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x"))))
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
+ "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse4arg")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pperm_pack_v4si_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x")
+ (vec_concat:V8HI
+ (truncate:V4HI
+ (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm"))
+ (truncate:V4HI
+ (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x"))))
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
+ "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse4arg")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_pperm_pack_v8hi_v16qi"
+ [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x")
+ (vec_concat:V16QI
+ (truncate:V8QI
+ (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm"))
+ (truncate:V8QI
+ (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x"))))
+ (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
+ "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse4arg")
+ (set_attr "mode" "TI")])
+
+;; Floating point permutation (permps, permpd)
+(define_insn "sse5_perm<mode>"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
+ (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
+ UNSPEC_SSE5_PERMUTE))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
+ "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sse4arg")
+ (set_attr "mode" "<MODE>")])
+
+;; SSE5 packed rotate instructions
+(define_expand "rotl<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "")
+ (rotate:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand")))]
+ "TARGET_SSE5"
+{
+ /* If we were given a scalar, convert it to parallel */
+ if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
+ {
+ rtvec vs = rtvec_alloc (<ssescalarnum>);
+ rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
+ rtx reg = gen_reg_rtx (<MODE>mode);
+ rtx op2 = operands[2];
+ int i;
+
+ if (GET_MODE (op2) != <ssescalarmode>mode)
+ {
+ op2 = gen_reg_rtx (<ssescalarmode>mode);
+ convert_move (op2, operands[2], false);
+ }
+
+ for (i = 0; i < <ssescalarnum>; i++)
+ RTVEC_ELT (vs, i) = op2;
+
+ emit_insn (gen_vec_init<mode> (reg, par));
+ emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
+ DONE;
+ }
+})
+
+(define_expand "rotr<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "")
+ (rotatert:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "general_operand")))]
+ "TARGET_SSE5"
+{
+ /* If we were given a scalar, convert it to parallel */
+ if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
+ {
+ rtvec vs = rtvec_alloc (<ssescalarnum>);
+ rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
+ rtx neg = gen_reg_rtx (<MODE>mode);
+ rtx reg = gen_reg_rtx (<MODE>mode);
+ rtx op2 = operands[2];
+ int i;
+
+ if (GET_MODE (op2) != <ssescalarmode>mode)
+ {
+ op2 = gen_reg_rtx (<ssescalarmode>mode);
+ convert_move (op2, operands[2], false);
+ }
+
+ for (i = 0; i < <ssescalarnum>; i++)
+ RTVEC_ELT (vs, i) = op2;
+
+ emit_insn (gen_vec_init<mode> (reg, par));
+ emit_insn (gen_neg<mode>2 (neg, reg));
+ emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg));
+ DONE;
+ }
+})
+
+(define_insn "sse5_rotl<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (rotate:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
+ "TARGET_SSE5"
+ "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_rotr<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (rotatert:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
+ "TARGET_SSE5"
+{
+ operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
+ return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_expand "vrotr<mode>3"
+ [(match_operand:SSEMODE1248 0 "register_operand" "")
+ (match_operand:SSEMODE1248 1 "register_operand" "")
+ (match_operand:SSEMODE1248 2 "register_operand" "")]
+ "TARGET_SSE5"
+{
+ rtx reg = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_neg<mode>2 (reg, operands[2]));
+ emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg));
+ DONE;
+})
+
+(define_expand "vrotl<mode>3"
+ [(match_operand:SSEMODE1248 0 "register_operand" "")
+ (match_operand:SSEMODE1248 1 "register_operand" "")
+ (match_operand:SSEMODE1248 2 "register_operand" "")]
+ "TARGET_SSE5"
+{
+ emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_insn "sse5_vrotl<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+ (if_then_else:SSEMODE1248
+ (ge:SSEMODE1248
+ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
+ (const_int 0))
+ (rotate:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
+ (match_dup 2))
+ (rotatert:SSEMODE1248
+ (match_dup 1)
+ (neg:SSEMODE1248 (match_dup 2)))))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
+ "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+;; SSE5 packed shift instructions.
+;; FIXME: add V2DI back in
+(define_expand "vlshr<mode>3"
+ [(match_operand:SSEMODE124 0 "register_operand" "")
+ (match_operand:SSEMODE124 1 "register_operand" "")
+ (match_operand:SSEMODE124 2 "register_operand" "")]
+ "TARGET_SSE5"
+{
+ rtx neg = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
+ emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg));
+ DONE;
+})
+
+(define_expand "vashr<mode>3"
+ [(match_operand:SSEMODE124 0 "register_operand" "")
+ (match_operand:SSEMODE124 1 "register_operand" "")
+ (match_operand:SSEMODE124 2 "register_operand" "")]
+ "TARGET_SSE5"
+{
+ rtx neg = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_neg<mode>2 (neg, operands[2]));
+ emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg));
+ DONE;
+})
+
+(define_expand "vashl<mode>3"
+ [(match_operand:SSEMODE124 0 "register_operand" "")
+ (match_operand:SSEMODE124 1 "register_operand" "")
+ (match_operand:SSEMODE124 2 "register_operand" "")]
+ "TARGET_SSE5"
+{
+ emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2]));
+ DONE;
+})
+
+(define_insn "sse5_ashl<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+ (if_then_else:SSEMODE1248
+ (ge:SSEMODE1248
+ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
+ (const_int 0))
+ (ashift:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
+ (match_dup 2))
+ (ashiftrt:SSEMODE1248
+ (match_dup 1)
+ (neg:SSEMODE1248 (match_dup 2)))))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
+ "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_lshl<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
+ (if_then_else:SSEMODE1248
+ (ge:SSEMODE1248
+ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x")
+ (const_int 0))
+ (ashift:SSEMODE1248
+ (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm")
+ (match_dup 2))
+ (lshiftrt:SSEMODE1248
+ (match_dup 1)
+ (neg:SSEMODE1248 (match_dup 2)))))]
+ "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)"
+ "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set_attr "mode" "TI")])
+
+;; SSE2 doesn't have some shift varients, so define versions for SSE5
+(define_expand "ashlv16qi3"
+ [(match_operand:V16QI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" "")]
+ "TARGET_SSE5"
+{
+ rtvec vs = rtvec_alloc (16);
+ rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+ rtx reg = gen_reg_rtx (V16QImode);
+ int i;
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (vs, i) = operands[2];
+
+ emit_insn (gen_vec_initv16qi (reg, par));
+ emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
+ DONE;
+})
+
+(define_expand "lshlv16qi3"
+ [(match_operand:V16QI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" "")]
+ "TARGET_SSE5"
+{
+ rtvec vs = rtvec_alloc (16);
+ rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+ rtx reg = gen_reg_rtx (V16QImode);
+ int i;
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (vs, i) = operands[2];
+
+ emit_insn (gen_vec_initv16qi (reg, par));
+ emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg));
+ DONE;
+})
+
+(define_expand "ashrv16qi3"
+ [(match_operand:V16QI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")
+ (match_operand:SI 2 "nonmemory_operand" "")]
+ "TARGET_SSE5"
+{
+ rtvec vs = rtvec_alloc (16);
+ rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+ rtx reg = gen_reg_rtx (V16QImode);
+ int i;
+ rtx ele = ((GET_CODE (operands[2]) == CONST_INT)
+ ? GEN_INT (- INTVAL (operands[2]))
+ : operands[2]);
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (vs, i) = ele;
+
+ emit_insn (gen_vec_initv16qi (reg, par));
+
+ if (GET_CODE (operands[2]) != CONST_INT)
+ {
+ rtx neg = gen_reg_rtx (V16QImode);
+ emit_insn (gen_negv16qi2 (neg, reg));
+ emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg));
+ }
+ else
+ emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg));
+
+ DONE;
+})
+
+(define_expand "ashrv2di3"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V2DI 1 "register_operand" "")
+ (match_operand:DI 2 "nonmemory_operand" "")]
+ "TARGET_SSE5"
+{
+ rtvec vs = rtvec_alloc (2);
+ rtx par = gen_rtx_PARALLEL (V2DImode, vs);
+ rtx reg = gen_reg_rtx (V2DImode);
+ rtx ele;
+
+ if (GET_CODE (operands[2]) == CONST_INT)
+ ele = GEN_INT (- INTVAL (operands[2]));
+ else if (GET_MODE (operands[2]) != DImode)
+ {
+ rtx move = gen_reg_rtx (DImode);
+ ele = gen_reg_rtx (DImode);
+ convert_move (move, operands[2], false);
+ emit_insn (gen_negdi2 (ele, move));
+ }
+ else
+ {
+ ele = gen_reg_rtx (DImode);
+ emit_insn (gen_negdi2 (ele, operands[2]));
+ }
+
+ RTVEC_ELT (vs, 0) = ele;
+ RTVEC_ELT (vs, 1) = ele;
+ emit_insn (gen_vec_initv2di (reg, par));
+ emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg));
+ DONE;
+})
+
+;; SSE5 FRCZ support
+;; parallel insns
+(define_insn "sse5_frcz<mode>2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
+ UNSPEC_FRCZ))]
+ "TARGET_SSE5"
+ "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "<MODE>")])
+
+;; scalar insns
+(define_insn "sse5_vmfrcz<mode>2"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
+ UNSPEC_FRCZ)
+ (match_operand:SSEMODEF2P 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE5"
+ "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecvt1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sse5_cvtph2ps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")]
+ UNSPEC_CVTPH2PS))]
+ "TARGET_SSE5"
+ "cvtph2ps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse5_cvtps2ph"
+ [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
+ (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
+ UNSPEC_CVTPS2PH))]
+ "TARGET_SSE5"
+ "cvtps2ph\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "mode" "V4SF")])
+
+;; Scalar versions of the com instructions that use vector types that are
+;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
+;; com instructions fill in 0's in the upper bits instead of leaving them
+;; unmodified, so we use const_vector of 0 instead of match_dup.
+(define_expand "sse5_vmmaskcmp<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
+ (vec_merge:SSEMODEF2P
+ (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
+ [(match_operand:SSEMODEF2P 2 "register_operand" "")
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
+ (match_dup 4)
+ (const_int 1)))]
+ "TARGET_SSE5"
+{
+ operands[4] = CONST0_RTX (<MODE>mode);
+})
+
+(define_insn "*sse5_vmmaskcmp<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (vec_merge:SSEMODEF2P
+ (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
+ [(match_operand:SSEMODEF2P 2 "register_operand" "x")
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
+ (match_operand:SSEMODEF2P 4 "")
+ (const_int 1)))]
+ "TARGET_SSE5"
+ "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sse4arg")
+ (set_attr "mode" "<ssescalarmode>")])
+
+;; We don't have a comparison operator that always returns true/false, so
+;; handle comfalse and comtrue specially.
+(define_insn "sse5_com_tf<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (unspec:SSEMODEF2P
+ [(match_operand:SSEMODEF2P 1 "register_operand" "x")
+ (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_int_operand" "n")]
+ UNSPEC_SSE5_TRUEFALSE))]
+ "TARGET_SSE5"
+{
+ const char *ret = NULL;
+
+ switch (INTVAL (operands[3]))
+ {
+ case COM_FALSE_S:
+ ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
+ break;
+
+ case COM_FALSE_P:
+ ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
+ break;
+
+ case COM_TRUE_S:
+ ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
+ break;
+
+ case COM_TRUE_P:
+ ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ return ret;
+}
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sse5_maskcmp<mode>3"
+ [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+ (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
+ [(match_operand:SSEMODEF2P 2 "register_operand" "x")
+ (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
+ "TARGET_SSE5"
+ "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "sse5_maskcmp<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
+ [(match_operand:SSEMODE1248 2 "register_operand" "x")
+ (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
+ "TARGET_SSE5"
+ "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sse4arg")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse5_maskcmp_uns<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
+ [(match_operand:SSEMODE1248 2 "register_operand" "x")
+ (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
+ "TARGET_SSE5"
+ "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
+;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
+;; and pcomneu* not to be converted to the signed ones in case somebody needs
+;; the exact instruction generated for the intrinsic.
+(define_insn "sse5_maskcmp_uns2<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (unspec:SSEMODE1248
+ [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
+ [(match_operand:SSEMODE1248 2 "register_operand" "x")
+ (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
+ UNSPEC_SSE5_UNSIGNED_CMP))]
+ "TARGET_SSE5"
+ "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
+;; Pcomtrue and pcomfalse support. These are useless instructions, but are
+;; being added here to be complete.
+(define_insn "sse5_pcom_tf<mode>3"
+ [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
+ (unspec:SSEMODE1248
+ [(match_operand:SSEMODE1248 1 "register_operand" "x")
+ (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_int_operand" "n")]
+ UNSPEC_SSE5_TRUEFALSE))]
+ "TARGET_SSE5"
+{
+ return ((INTVAL (operands[3]) != 0)
+ ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+ : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
+}
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_aesenc"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_AESENC))]
+ "TARGET_AES && TARGET_AVX"
+ "vaesenc\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "aesenc"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_AESENC))]
+ "TARGET_AES"
+ "aesenc\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_aesenclast"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_AESENCLAST))]
+ "TARGET_AES && TARGET_AVX"
+ "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "aesenclast"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_AESENCLAST))]
+ "TARGET_AES"
+ "aesenclast\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_aesdec"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_AESDEC))]
+ "TARGET_AES && TARGET_AVX"
+ "vaesdec\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "aesdec"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_AESDEC))]
+ "TARGET_AES"
+ "aesdec\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx_aesdeclast"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_AESDECLAST))]
+ "TARGET_AES && TARGET_AVX"
+ "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "aesdeclast"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
+ UNSPEC_AESDECLAST))]
+ "TARGET_AES"
+ "aesdeclast\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "aesimc"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
+ UNSPEC_AESIMC))]
+ "TARGET_AES"
+ "%vaesimc\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "aeskeygenassist"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_255_operand" "n")]
+ UNSPEC_AESKEYGENASSIST))]
+ "TARGET_AES"
+ "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*vpclmulqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_PCLMUL))]
+ "TARGET_PCLMUL && TARGET_AVX"
+ "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "TI")])
+
+(define_insn "pclmulqdq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_PCLMUL))]
+ "TARGET_PCLMUL"
+ "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "avx_vzeroall"
+ [(match_par_dup 0 [(const_int 0)])]
+ "TARGET_AVX"
+{
+ int nregs = TARGET_64BIT ? 16 : 8;
+ int regno;
+
+ operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
+
+ XVECEXP (operands[0], 0, 0)
+ = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
+ UNSPECV_VZEROALL);
+
+ for (regno = 0; regno < nregs; regno++)
+ XVECEXP (operands[0], 0, regno + 1)
+ = gen_rtx_SET (VOIDmode,
+ gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
+ CONST0_RTX (V8SImode));
+})
+
+(define_insn "*avx_vzeroall"
+ [(match_parallel 0 "vzeroall_operation"
+ [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)
+ (set (match_operand 1 "register_operand" "=x")
+ (match_operand 2 "const0_operand" "X"))])]
+ "TARGET_AVX"
+ "vzeroall"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "none")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+;; vzeroupper clobbers the upper 128bits of AVX registers.
+(define_insn "avx_vzeroupper"
+ [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
+ (clobber (reg:V8SI XMM0_REG))
+ (clobber (reg:V8SI XMM1_REG))
+ (clobber (reg:V8SI XMM2_REG))
+ (clobber (reg:V8SI XMM3_REG))
+ (clobber (reg:V8SI XMM4_REG))
+ (clobber (reg:V8SI XMM5_REG))
+ (clobber (reg:V8SI XMM6_REG))
+ (clobber (reg:V8SI XMM7_REG))]
+ "TARGET_AVX && !TARGET_64BIT"
+ "vzeroupper"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "none")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_insn "avx_vzeroupper_rex64"
+ [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)
+ (clobber (reg:V8SI XMM0_REG))
+ (clobber (reg:V8SI XMM1_REG))
+ (clobber (reg:V8SI XMM2_REG))
+ (clobber (reg:V8SI XMM3_REG))
+ (clobber (reg:V8SI XMM4_REG))
+ (clobber (reg:V8SI XMM5_REG))
+ (clobber (reg:V8SI XMM6_REG))
+ (clobber (reg:V8SI XMM7_REG))
+ (clobber (reg:V8SI XMM8_REG))
+ (clobber (reg:V8SI XMM9_REG))
+ (clobber (reg:V8SI XMM10_REG))
+ (clobber (reg:V8SI XMM11_REG))
+ (clobber (reg:V8SI XMM12_REG))
+ (clobber (reg:V8SI XMM13_REG))
+ (clobber (reg:V8SI XMM14_REG))
+ (clobber (reg:V8SI XMM15_REG))]
+ "TARGET_AVX && TARGET_64BIT"
+ "vzeroupper"
+ [(set_attr "type" "sse")
+ (set_attr "memory" "none")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
+(define_insn "avx_vpermil<mode>"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "xm")
+ (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")]
+ UNSPEC_VPERMIL))]
+ "TARGET_AVX"
+ "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_vpermilvar<mode>3"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
+ UNSPEC_VPERMIL))]
+ "TARGET_AVX"
+ "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_vperm2f128<mode>3"
+ [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
+ (unspec:AVX256MODE2P
+ [(match_operand:AVX256MODE2P 1 "register_operand" "x")
+ (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_VPERMIL2F128))]
+ "TARGET_AVX"
+ "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
+ (vec_concat:AVXMODEF4P
+ (vec_concat:<avxhalfvecmode>
+ (match_operand:<avxscalarmode> 1 "memory_operand" "m")
+ (match_dup 1))
+ (vec_concat:<avxhalfvecmode>
+ (match_dup 1)
+ (match_dup 1))))]
+ "TARGET_AVX"
+ "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxscalarmode>")])
+
+(define_insn "avx_vbroadcastss256"
+ [(set (match_operand:V8SF 0 "register_operand" "=x")
+ (vec_concat:V8SF
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (match_operand:SF 1 "memory_operand" "m")
+ (match_dup 1))
+ (vec_concat:V2SF
+ (match_dup 1)
+ (match_dup 1)))
+ (vec_concat:V4SF
+ (vec_concat:V2SF
+ (match_dup 1)
+ (match_dup 1))
+ (vec_concat:V2SF
+ (match_dup 1)
+ (match_dup 1)))))]
+ "TARGET_AVX"
+ "vbroadcastss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "SF")])
+
+(define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
+ [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODEF2P
+ (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
+ (match_dup 1)))]
+ "TARGET_AVX"
+ "vbroadcastf128\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "avx_vinsertf128<mode>"
+ [(match_operand:AVX256MODE 0 "register_operand" "")
+ (match_operand:AVX256MODE 1 "register_operand" "")
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_0_to_1_operand" "")]
+ "TARGET_AVX"
+{
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ emit_insn (gen_vec_set_lo_<mode> (operands[0], operands[1],
+ operands[2]));
+ break;
+ case 1:
+ emit_insn (gen_vec_set_hi_<mode> (operands[0], operands[1],
+ operands[2]));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
+})
+
+(define_insn "vec_set_lo_<mode>"
+ [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE4P
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_<mode>"
+ [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE4P
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE4P 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_lo_<mode>"
+ [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE8P
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 1 "register_operand" "x")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_<mode>"
+ [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
+ (vec_concat:AVX256MODE8P
+ (vec_select:<avxhalfvecmode>
+ (match_operand:AVX256MODE8P 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_lo_v16hi"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_concat:V16HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "x")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_v16hi"
+ [(set (match_operand:V16HI 0 "register_operand" "=x")
+ (vec_concat:V16HI
+ (vec_select:V8HI
+ (match_operand:V16HI 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_lo_v32qi"
+ [(set (match_operand:V32QI 0 "register_operand" "=x")
+ (vec_concat:V32QI
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "x")
+ (parallel [(const_int 16) (const_int 17)
+ (const_int 18) (const_int 19)
+ (const_int 20) (const_int 21)
+ (const_int 22) (const_int 23)
+ (const_int 24) (const_int 25)
+ (const_int 26) (const_int 27)
+ (const_int 28) (const_int 29)
+ (const_int 30) (const_int 31)]))))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "vec_set_hi_v32qi"
+ [(set (match_operand:V32QI 0 "register_operand" "=x")
+ (vec_concat:V32QI
+ (vec_select:V16QI
+ (match_operand:V32QI 1 "register_operand" "x")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)
+ (const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_AVX"
+ "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V8SF")])
+
+(define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
+ (match_operand:AVXMODEF2P 2 "register_operand" "x")
+ (match_dup 0)]
+ UNSPEC_MASKLOAD))]
+ "TARGET_AVX"
+ "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
+ [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
+ (unspec:AVXMODEF2P
+ [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+ (match_operand:AVXMODEF2P 2 "register_operand" "x")
+ (match_dup 0)]
+ UNSPEC_MASKSTORE))]
+ "TARGET_AVX"
+ "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
+ [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
+ (unspec:AVX256MODE2P
+ [(match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "0,xm")]
+ UNSPEC_CAST))]
+ "TARGET_AVX"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "";
+ case 1:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8SF:
+ return "vmovaps\t{%1, %x0|%x0, %1}";
+ case MODE_V4DF:
+ return "vmovapd\t{%1, %x0|%x0, %1}";
+ case MODE_OI:
+ return "vmovdqa\t{%1, %x0|%x0, %1}";
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+ gcc_unreachable ();
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")
+ (set (attr "length")
+ (if_then_else (eq_attr "alternative" "0")
+ (const_string "0")
+ (const_string "*")))])
+
+(define_insn "avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>"
+ [(set (match_operand:<avxhalfvecmode> 0 "register_operand" "=x,x")
+ (unspec:<avxhalfvecmode>
+ [(match_operand:AVX256MODE2P 1 "nonimmediate_operand" "0,xm")]
+ UNSPEC_CAST))]
+ "TARGET_AVX"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "";
+ case 1:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8SF:
+ return "vmovaps\t{%x1, %0|%0, %x1}";
+ case MODE_V4DF:
+ return "vmovapd\t{%x1, %0|%0, %x1}";
+ case MODE_OI:
+ return "vmovdqa\t{%x1, %0|%0, %x1}";
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+ gcc_unreachable ();
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")
+ (set (attr "length")
+ (if_then_else (eq_attr "alternative" "0")
+ (const_string "0")
+ (const_string "*")))])
+
+(define_expand "vec_init<mode>"
+ [(match_operand:AVX256MODE 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_AVX"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "*vec_concat<mode>_avx"
+ [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
+ (vec_concat:AVX256MODE
+ (match_operand:<avxhalfvecmode> 1 "register_operand" "x,x")
+ (match_operand:<avxhalfvecmode> 2 "vector_move_operand" "xm,C")))]
+ "TARGET_AVX"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
+ case 1:
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8SF:
+ return "vmovaps\t{%1, %x0|%x0, %1}";
+ case MODE_V4DF:
+ return "vmovapd\t{%1, %x0|%x0, %1}";
+ default:
+ return "vmovdqa\t{%1, %x0|%x0, %1}";
+ }
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "type" "sselog,ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<avxvecmode>")])
diff --git a/gcc-4.4.0/gcc/config/i386/sync.md b/gcc-4.4.0/gcc/config/i386/sync.md
new file mode 100644
index 000000000..05aad00ba
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/sync.md
@@ -0,0 +1,347 @@
+;; GCC machine description for i386 synchronization instructions.
+;; Copyright (C) 2005, 2006, 2007, 2008, 2009
+;; Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_mode_iterator IMODE [QI HI SI (DI "TARGET_64BIT")])
+(define_mode_attr modesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
+(define_mode_attr modeconstraint [(QI "q") (HI "r") (SI "r") (DI "r")])
+(define_mode_attr immconstraint [(QI "i") (HI "i") (SI "i") (DI "e")])
+
+(define_mode_iterator CASMODE [QI HI SI (DI "TARGET_64BIT || TARGET_CMPXCHG8B")
+ (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_iterator DCASMODE
+ [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic")
+ (TI "TARGET_64BIT && TARGET_CMPXCHG16B")])
+(define_mode_attr doublemodesuffix [(DI "8") (TI "16")])
+(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")])
+
+(define_expand "memory_barrier"
+ [(set (match_dup 0)
+ (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
+ ""
+{
+ operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (operands[0]) = 1;
+
+ if (!(TARGET_64BIT || TARGET_SSE2))
+ {
+ emit_insn (gen_memory_barrier_nosse (operands[0]));
+ DONE;
+ }
+})
+
+(define_insn "memory_barrier_nosse"
+ [(set (match_operand:BLK 0 "" "")
+ (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))
+ (clobber (reg:CC FLAGS_REG))]
+ "!(TARGET_64BIT || TARGET_SSE2)"
+ "lock{%;| }or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
+ [(set_attr "memory" "unknown")])
+
+;; ??? It would be possible to use cmpxchg8b on pentium for DImode
+;; changes. It's complicated because the insn uses ecx:ebx as the
+;; new value; note that the registers are reversed from the order
+;; that they'd be in with (reg:DI 2 ecx). Similarly for TImode
+;; data in 64-bit mode.
+
+(define_expand "sync_compare_and_swap<mode>"
+ [(parallel
+ [(set (match_operand:CASMODE 0 "register_operand" "")
+ (match_operand:CASMODE 1 "memory_operand" ""))
+ (set (match_dup 1)
+ (unspec_volatile:CASMODE
+ [(match_dup 1)
+ (match_operand:CASMODE 2 "register_operand" "")
+ (match_operand:CASMODE 3 "register_operand" "")]
+ UNSPECV_CMPXCHG))
+ (clobber (reg:CC FLAGS_REG))])]
+ "TARGET_CMPXCHG"
+{
+ if ((<MODE>mode == DImode && !TARGET_64BIT) || <MODE>mode == TImode)
+ {
+ enum machine_mode hmode = <MODE>mode == DImode ? SImode : DImode;
+ rtx low = simplify_gen_subreg (hmode, operands[3], <MODE>mode, 0);
+ rtx high = simplify_gen_subreg (hmode, operands[3], <MODE>mode,
+ GET_MODE_SIZE (hmode));
+ low = force_reg (hmode, low);
+ high = force_reg (hmode, high);
+ if (<MODE>mode == DImode)
+ {
+ if (flag_pic && !cmpxchg8b_pic_memory_operand (operands[1], DImode))
+ operands[1] = replace_equiv_address (operands[1],
+ force_reg (Pmode,
+ XEXP (operands[1],
+ 0)));
+ emit_insn (gen_sync_double_compare_and_swapdi
+ (operands[0], operands[1], operands[2], low, high));
+ }
+ else if (<MODE>mode == TImode)
+ emit_insn (gen_sync_double_compare_and_swapti
+ (operands[0], operands[1], operands[2], low, high));
+ else
+ gcc_unreachable ();
+ DONE;
+ }
+})
+
+(define_insn "*sync_compare_and_swap<mode>"
+ [(set (match_operand:IMODE 0 "register_operand" "=a")
+ (match_operand:IMODE 1 "memory_operand" "+m"))
+ (set (match_dup 1)
+ (unspec_volatile:IMODE
+ [(match_dup 1)
+ (match_operand:IMODE 2 "register_operand" "a")
+ (match_operand:IMODE 3 "register_operand" "<modeconstraint>")]
+ UNSPECV_CMPXCHG))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_CMPXCHG"
+ "lock{%;| }cmpxchg{<modesuffix>}\t{%3, %1|%1, %3}")
+
+(define_insn "sync_double_compare_and_swap<mode>"
+ [(set (match_operand:DCASMODE 0 "register_operand" "=A")
+ (match_operand:DCASMODE 1 "memory_operand" "+m"))
+ (set (match_dup 1)
+ (unspec_volatile:DCASMODE
+ [(match_dup 1)
+ (match_operand:DCASMODE 2 "register_operand" "A")
+ (match_operand:<DCASHMODE> 3 "register_operand" "b")
+ (match_operand:<DCASHMODE> 4 "register_operand" "c")]
+ UNSPECV_CMPXCHG))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "lock{%;| }cmpxchg<doublemodesuffix>b\t%1")
+
+;; Theoretically we'd like to use constraint "r" (any reg) for operand
+;; 3, but that includes ecx. If operand 3 and 4 are the same (like when
+;; the input is -1LL) GCC might chose to allocate operand 3 to ecx, like
+;; operand 4. This breaks, as the xchg will move the PIC register contents
+;; to %ecx then --> boom. Operands 3 and 4 really need to be different
+;; registers, which in this case means operand 3 must not be ecx.
+;; Instead of playing tricks with fake early clobbers or the like we
+;; just enumerate all regs possible here, which (as this is !TARGET_64BIT)
+;; are just esi and edi.
+(define_insn "*sync_double_compare_and_swapdi_pic"
+ [(set (match_operand:DI 0 "register_operand" "=A")
+ (match_operand:DI 1 "cmpxchg8b_pic_memory_operand" "+m"))
+ (set (match_dup 1)
+ (unspec_volatile:DI
+ [(match_dup 1)
+ (match_operand:DI 2 "register_operand" "A")
+ (match_operand:SI 3 "register_operand" "SD")
+ (match_operand:SI 4 "register_operand" "c")]
+ UNSPECV_CMPXCHG))
+ (clobber (reg:CC FLAGS_REG))]
+ "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic"
+ "xchg{l}\t%%ebx, %3\;lock{%;| }cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3")
+
+(define_expand "sync_compare_and_swap_cc<mode>"
+ [(parallel
+ [(set (match_operand:CASMODE 0 "register_operand" "")
+ (match_operand:CASMODE 1 "memory_operand" ""))
+ (set (match_dup 1)
+ (unspec_volatile:CASMODE
+ [(match_dup 1)
+ (match_operand:CASMODE 2 "register_operand" "")
+ (match_operand:CASMODE 3 "register_operand" "")]
+ UNSPECV_CMPXCHG))
+ (set (match_dup 4)
+ (compare:CCZ
+ (unspec_volatile:CASMODE
+ [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG)
+ (match_dup 2)))])]
+ "TARGET_CMPXCHG"
+{
+ operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
+ ix86_compare_op0 = operands[3];
+ ix86_compare_op1 = NULL;
+ ix86_compare_emitted = operands[4];
+ if ((<MODE>mode == DImode && !TARGET_64BIT) || <MODE>mode == TImode)
+ {
+ enum machine_mode hmode = <MODE>mode == DImode ? SImode : DImode;
+ rtx low = simplify_gen_subreg (hmode, operands[3], <MODE>mode, 0);
+ rtx high = simplify_gen_subreg (hmode, operands[3], <MODE>mode,
+ GET_MODE_SIZE (hmode));
+ low = force_reg (hmode, low);
+ high = force_reg (hmode, high);
+ if (<MODE>mode == DImode)
+ {
+ if (flag_pic && !cmpxchg8b_pic_memory_operand (operands[1], DImode))
+ operands[1] = replace_equiv_address (operands[1],
+ force_reg (Pmode,
+ XEXP (operands[1],
+ 0)));
+ emit_insn (gen_sync_double_compare_and_swap_ccdi
+ (operands[0], operands[1], operands[2], low, high));
+ }
+ else if (<MODE>mode == TImode)
+ emit_insn (gen_sync_double_compare_and_swap_ccti
+ (operands[0], operands[1], operands[2], low, high));
+ else
+ gcc_unreachable ();
+ DONE;
+ }
+})
+
+(define_insn "*sync_compare_and_swap_cc<mode>"
+ [(set (match_operand:IMODE 0 "register_operand" "=a")
+ (match_operand:IMODE 1 "memory_operand" "+m"))
+ (set (match_dup 1)
+ (unspec_volatile:IMODE
+ [(match_dup 1)
+ (match_operand:IMODE 2 "register_operand" "a")
+ (match_operand:IMODE 3 "register_operand" "<modeconstraint>")]
+ UNSPECV_CMPXCHG))
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (unspec_volatile:IMODE
+ [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG)
+ (match_dup 2)))]
+ "TARGET_CMPXCHG"
+ "lock{%;| }cmpxchg{<modesuffix>}\t{%3, %1|%1, %3}")
+
+(define_insn "sync_double_compare_and_swap_cc<mode>"
+ [(set (match_operand:DCASMODE 0 "register_operand" "=A")
+ (match_operand:DCASMODE 1 "memory_operand" "+m"))
+ (set (match_dup 1)
+ (unspec_volatile:DCASMODE
+ [(match_dup 1)
+ (match_operand:DCASMODE 2 "register_operand" "A")
+ (match_operand:<DCASHMODE> 3 "register_operand" "b")
+ (match_operand:<DCASHMODE> 4 "register_operand" "c")]
+ UNSPECV_CMPXCHG))
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (unspec_volatile:DCASMODE
+ [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
+ UNSPECV_CMPXCHG)
+ (match_dup 2)))]
+ ""
+ "lock{%;| }cmpxchg<doublemodesuffix>b\t%1")
+
+;; See above for the explanation of using the constraint "SD" for
+;; operand 3.
+(define_insn "*sync_double_compare_and_swap_ccdi_pic"
+ [(set (match_operand:DI 0 "register_operand" "=A")
+ (match_operand:DI 1 "cmpxchg8b_pic_memory_operand" "+m"))
+ (set (match_dup 1)
+ (unspec_volatile:DI
+ [(match_dup 1)
+ (match_operand:DI 2 "register_operand" "A")
+ (match_operand:SI 3 "register_operand" "SD")
+ (match_operand:SI 4 "register_operand" "c")]
+ UNSPECV_CMPXCHG))
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (unspec_volatile:DI
+ [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)]
+ UNSPECV_CMPXCHG)
+ (match_dup 2)))]
+ "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic"
+ "xchg{l}\t%%ebx, %3\;lock{%;| }cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3")
+
+(define_insn "sync_old_add<mode>"
+ [(set (match_operand:IMODE 0 "register_operand" "=<modeconstraint>")
+ (unspec_volatile:IMODE
+ [(match_operand:IMODE 1 "memory_operand" "+m")] UNSPECV_XCHG))
+ (set (match_dup 1)
+ (plus:IMODE (match_dup 1)
+ (match_operand:IMODE 2 "register_operand" "0")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_XADD"
+ "lock{%;| }xadd{<modesuffix>}\t{%0, %1|%1, %0}")
+
+;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
+(define_insn "sync_lock_test_and_set<mode>"
+ [(set (match_operand:IMODE 0 "register_operand" "=<modeconstraint>")
+ (unspec_volatile:IMODE
+ [(match_operand:IMODE 1 "memory_operand" "+m")] UNSPECV_XCHG))
+ (set (match_dup 1)
+ (match_operand:IMODE 2 "register_operand" "0"))]
+ ""
+ "xchg{<modesuffix>}\t{%1, %0|%0, %1}")
+
+(define_insn "sync_add<mode>"
+ [(set (match_operand:IMODE 0 "memory_operand" "+m")
+ (unspec_volatile:IMODE
+ [(plus:IMODE (match_dup 0)
+ (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))]
+ UNSPECV_LOCK))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+{
+ if (TARGET_USE_INCDEC)
+ {
+ if (operands[1] == const1_rtx)
+ return "lock{%;| }inc{<modesuffix>}\t%0";
+ if (operands[1] == constm1_rtx)
+ return "lock{%;| }dec{<modesuffix>}\t%0";
+ }
+
+ return "lock{%;| }add{<modesuffix>}\t{%1, %0|%0, %1}";
+})
+
+(define_insn "sync_sub<mode>"
+ [(set (match_operand:IMODE 0 "memory_operand" "+m")
+ (unspec_volatile:IMODE
+ [(minus:IMODE (match_dup 0)
+ (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))]
+ UNSPECV_LOCK))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+{
+ if (TARGET_USE_INCDEC)
+ {
+ if (operands[1] == const1_rtx)
+ return "lock{%;| }dec{<modesuffix>}\t%0";
+ if (operands[1] == constm1_rtx)
+ return "lock{%;| }inc{<modesuffix>}\t%0";
+ }
+
+ return "lock{%;| }sub{<modesuffix>}\t{%1, %0|%0, %1}";
+})
+
+(define_insn "sync_ior<mode>"
+ [(set (match_operand:IMODE 0 "memory_operand" "+m")
+ (unspec_volatile:IMODE
+ [(ior:IMODE (match_dup 0)
+ (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))]
+ UNSPECV_LOCK))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "lock{%;| }or{<modesuffix>}\t{%1, %0|%0, %1}")
+
+(define_insn "sync_and<mode>"
+ [(set (match_operand:IMODE 0 "memory_operand" "+m")
+ (unspec_volatile:IMODE
+ [(and:IMODE (match_dup 0)
+ (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))]
+ UNSPECV_LOCK))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "lock{%;| }and{<modesuffix>}\t{%1, %0|%0, %1}")
+
+(define_insn "sync_xor<mode>"
+ [(set (match_operand:IMODE 0 "memory_operand" "+m")
+ (unspec_volatile:IMODE
+ [(xor:IMODE (match_dup 0)
+ (match_operand:IMODE 1 "nonmemory_operand" "<modeconstraint><immconstraint>"))]
+ UNSPECV_LOCK))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "lock{%;| }xor{<modesuffix>}\t{%1, %0|%0, %1}")
diff --git a/gcc-4.4.0/gcc/config/i386/sysv4.h b/gcc-4.4.0/gcc/config/i386/sysv4.h
new file mode 100644
index 000000000..bedac7a58
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/sysv4.h
@@ -0,0 +1,132 @@
+/* Target definitions for GCC for Intel 80386 running System V.4
+ Copyright (C) 1991, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.
+
+ Written by Ron Guilmette (rfg@netcom.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+
+#define TARGET_VERSION fprintf (stderr, " (i386 System V Release 4)");
+
+/* The svr4 ABI for the i386 says that records and unions are returned
+ in memory. */
+
+#define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \
+ (TYPE_MODE (TYPE) == BLKmode \
+ || (VECTOR_MODE_P (TYPE_MODE (TYPE)) && int_size_in_bytes (TYPE) == 8));
+
+/* Output at beginning of assembler file. */
+/* The .file command should always begin the output. */
+
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+#undef X86_FILE_START_VERSION_DIRECTIVE
+#define X86_FILE_START_VERSION_DIRECTIVE true
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) svr4_dbx_register_map[n]
+
+/* The routine used to output sequences of byte values. We use a special
+ version of this for most svr4 targets because doing so makes the
+ generated assembly code more compact (and thus faster to assemble)
+ as well as more readable. Note that if we find subparts of the
+ character sequence which end with NUL (and which are shorter than
+ STRING_LIMIT) we output those using ASM_OUTPUT_LIMITED_STRING. */
+
+#undef ASM_OUTPUT_ASCII
+#define ASM_OUTPUT_ASCII(FILE, STR, LENGTH) \
+ do \
+ { \
+ const unsigned char *_ascii_bytes = \
+ (const unsigned char *) (STR); \
+ const unsigned char *limit = _ascii_bytes + (LENGTH); \
+ unsigned bytes_in_chunk = 0; \
+ for (; _ascii_bytes < limit; _ascii_bytes++) \
+ { \
+ const unsigned char *p; \
+ if (bytes_in_chunk >= 64) \
+ { \
+ fputc ('\n', (FILE)); \
+ bytes_in_chunk = 0; \
+ } \
+ for (p = _ascii_bytes; p < limit && *p != '\0'; p++) \
+ continue; \
+ if (p < limit && (p - _ascii_bytes) <= (long) STRING_LIMIT) \
+ { \
+ if (bytes_in_chunk > 0) \
+ { \
+ fputc ('\n', (FILE)); \
+ bytes_in_chunk = 0; \
+ } \
+ ASM_OUTPUT_LIMITED_STRING ((FILE), _ascii_bytes); \
+ _ascii_bytes = p; \
+ } \
+ else \
+ { \
+ if (bytes_in_chunk == 0) \
+ fprintf ((FILE), "\t.byte\t"); \
+ else \
+ fputc (',', (FILE)); \
+ fprintf ((FILE), "0x%02x", *_ascii_bytes); \
+ bytes_in_chunk += 5; \
+ } \
+ } \
+ if (bytes_in_chunk > 0) \
+ fprintf ((FILE), "\n"); \
+ } \
+ while (0)
+
+/* A C statement (sans semicolon) to output to the stdio stream
+ FILE the assembler definition of uninitialized global DECL named
+ NAME whose size is SIZE bytes and alignment is ALIGN bytes.
+ Try to use asm_output_aligned_bss to implement this macro. */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+ asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* Handle special EH pointer encodings. Absolute, pc-relative, and
+ indirect are handled automatically. */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+ do { \
+ if ((SIZE) == 4 && ((ENCODING) & 0x70) == DW_EH_PE_datarel) \
+ { \
+ fputs (ASM_LONG, FILE); \
+ assemble_name (FILE, XSTR (ADDR, 0)); \
+ fputs (((ENCODING) & DW_EH_PE_indirect ? "@GOT" : "@GOTOFF"), FILE); \
+ goto DONE; \
+ } \
+ } while (0)
+
+/* Used by crtstuff.c to initialize the base of data-relative relocations.
+ These are GOT relative on x86, so return the pic register. */
+#ifdef __PIC__
+#define CRT_GET_RFIB_DATA(BASE) \
+ { \
+ register void *ebx_ __asm__("ebx"); \
+ BASE = ebx_; \
+ }
+#else
+#define CRT_GET_RFIB_DATA(BASE) \
+ __asm__ ("call\t.LPR%=\n" \
+ ".LPR%=:\n\t" \
+ "pop{l}\t%0\n\t" \
+ /* Due to a GAS bug, this cannot use EAX. That encodes \
+ smaller than the traditional EBX, which results in the \
+ offset being off by one. */ \
+ "add{l}\t{$_GLOBAL_OFFSET_TABLE_+[.-.LPR%=],%0" \
+ "|%0,_GLOBAL_OFFSET_TABLE_+(.-.LPR%=)}" \
+ : "=d"(BASE))
+#endif
diff --git a/gcc-4.4.0/gcc/config/i386/t-crtfm b/gcc-4.4.0/gcc/config/i386/t-crtfm
new file mode 100644
index 000000000..4fa27e91c
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-crtfm
@@ -0,0 +1,8 @@
+EXTRA_PARTS += crtfastmath.o
+
+$(T)crtfastmath.o: $(srcdir)/config/i386/crtfastmath.c \
+ $(srcdir)/config/i386/cpuid.h $(GCC_PASSES)
+ $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) \
+ -msse -minline-all-stringops -c \
+ $(srcdir)/config/i386/crtfastmath.c \
+ -o $(T)crtfastmath$(objext)
diff --git a/gcc-4.4.0/gcc/config/i386/t-crtpc b/gcc-4.4.0/gcc/config/i386/t-crtpc
new file mode 100644
index 000000000..bc24bedf3
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-crtpc
@@ -0,0 +1,16 @@
+EXTRA_PARTS += crtprec32.o crtprec64.o crtprec80.o
+
+$(T)crtprec32.o: $(srcdir)/config/i386/crtprec.c $(GCC_PASSES)
+ $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -D__PREC=32 -c \
+ $(srcdir)/config/i386/crtprec.c \
+ -o $(T)crtprec32$(objext)
+
+$(T)crtprec64.o: $(srcdir)/config/i386/crtprec.c $(GCC_PASSES)
+ $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -D__PREC=64 -c \
+ $(srcdir)/config/i386/crtprec.c \
+ -o $(T)crtprec64$(objext)
+
+$(T)crtprec80.o: $(srcdir)/config/i386/crtprec.c $(GCC_PASSES)
+ $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) $(MULTILIB_CFLAGS) -D__PREC=80 -c \
+ $(srcdir)/config/i386/crtprec.c \
+ -o $(T)crtprec80$(objext)
diff --git a/gcc-4.4.0/gcc/config/i386/t-crtpic b/gcc-4.4.0/gcc/config/i386/t-crtpic
new file mode 100644
index 000000000..ff81a9bef
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-crtpic
@@ -0,0 +1,10 @@
+# The pushl in CTOR initialization interferes with frame pointer elimination.
+
+# We need to use -fPIC when we are using gcc to compile the routines in
+# crtstuff.c. This is only really needed when we are going to use gcc/g++
+# to produce a shared library, but since we don't know ahead of time when
+# we will be doing that, we just always use -fPIC when compiling the
+# routines in crtstuff.c.
+
+CRTSTUFF_T_CFLAGS = -fPIC -fno-omit-frame-pointer
+TARGET_LIBGCC2_CFLAGS = -fPIC
diff --git a/gcc-4.4.0/gcc/config/i386/t-crtstuff b/gcc-4.4.0/gcc/config/i386/t-crtstuff
new file mode 100644
index 000000000..c14dd9411
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-crtstuff
@@ -0,0 +1,7 @@
+# The pushl in CTOR initialization interferes with frame pointer elimination.
+# crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables,
+# because then __FRAME_END__ might not be the last thing in .eh_frame
+# section. -fno-asynchronous-unwind-tables is off by default for i386
+# and is on by default for x86-64. We turn it off for both i386 and
+# x86-64.
+CRTSTUFF_T_CFLAGS += -fno-omit-frame-pointer -fno-asynchronous-unwind-tables
diff --git a/gcc-4.4.0/gcc/config/i386/t-cygming b/gcc-4.4.0/gcc/config/i386/t-cygming
new file mode 100644
index 000000000..9e441a79a
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-cygming
@@ -0,0 +1,82 @@
+# cygwin and mingw always have a limits.h, but, depending upon how we are
+# doing the build, it may not be installed yet.
+LIMITS_H_TEST = true
+
+# If we are building next to winsup, this will let us find the real
+# limits.h when building libgcc2. Otherwise, winsup must be installed
+# first.
+LIBGCC2_INCLUDES = -I$(srcdir)/../winsup/w32api/include
+
+winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+ $(TM_P_H) toplev.h $(HASHTAB_H) $(GGC_H)
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/i386/winnt.c
+
+winnt-cxx.o: $(srcdir)/config/i386/winnt-cxx.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+ $(TM_P_H) toplev.h $(HASHTAB_H) $(GGC_H)
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/i386/winnt-cxx.c
+
+
+winnt-stubs.o: $(srcdir)/config/i386/winnt-stubs.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+ $(TM_P_H) toplev.h $(HASHTAB_H) $(GGC_H)
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/i386/winnt-stubs.c
+
+msformat-c.o: $(srcdir)/config/i386/msformat-c.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+ $(TM_P_H) toplev.h $(HASHTAB_H) $(GGC_H)
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/i386/msformat-c.c
+
+STMP_FIXINC=stmp-fixinc
+
+# Build a shared libgcc library for PECOFF with a DEF file
+# with the GNU linker.
+#
+# mkmap-flat.awk is used with the pe_dll option to produce a DEF instead
+# of an ELF map file.
+#
+# Warning: If SHLIB_SOVERSION or SHLIB_SONAME are updated, LIBGCC_SONAME
+# in mingw32.h and SHLIB_MKMAP_OPTS below must be updated also.
+
+SHLIB_EXT = .dll
+SHLIB_IMPLIB = @shlib_base_name@.a
+SHLIB_SOVERSION = 1
+SHLIB_SONAME = @shlib_base_name@_$(EH_MODEL)-$(SHLIB_SOVERSION)$(SHLIB_EXT)
+SHLIB_MAP = @shlib_map_file@
+SHLIB_OBJS = @shlib_objs@
+SHLIB_DIR = @multilib_dir@/shlib
+SHLIB_SLIBDIR_QUAL = @shlib_slibdir_qual@
+
+SHLIB_LINK = $(LN_S) -f $(SHLIB_MAP) $(SHLIB_MAP).def && \
+ if [ ! -d $(SHLIB_DIR) ]; then \
+ mkdir $(SHLIB_DIR); \
+ else true; fi && \
+ $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
+ $(SHLIB_MAP).def \
+ -Wl,--out-implib,$(SHLIB_DIR)/$(SHLIB_IMPLIB).tmp \
+ -o $(SHLIB_DIR)/$(SHLIB_SONAME).tmp @multilib_flags@ \
+ $(SHLIB_OBJS) $(SHLIB_LC) && \
+ if [ -f $(SHLIB_DIR)/$(SHLIB_SONAME) ]; then \
+ mv -f $(SHLIB_DIR)/$(SHLIB_SONAME) \
+ $(SHLIB_DIR)/$(SHLIB_SONAME).backup; \
+ else true; fi && \
+ mv $(SHLIB_DIR)/$(SHLIB_SONAME).tmp $(SHLIB_DIR)/$(SHLIB_SONAME) && \
+ mv $(SHLIB_DIR)/$(SHLIB_IMPLIB).tmp $(SHLIB_DIR)/$(SHLIB_IMPLIB)
+# $(slibdir) double quoted to protect it from expansion while building
+# libgcc.mk. We want this delayed until actual install time.
+SHLIB_INSTALL = \
+ $$(mkinstalldirs) $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
+ $(INSTALL_PROGRAM) $(SHLIB_DIR)/$(SHLIB_SONAME) \
+ $$(DESTDIR)$$(bindir)/$(SHLIB_SONAME); \
+ $(INSTALL_DATA) $(SHLIB_DIR)/$(SHLIB_IMPLIB) \
+ $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_IMPLIB)
+SHLIB_MKMAP = $(srcdir)/mkmap-flat.awk
+# We'd like to use SHLIB_SONAME here too, but shlib_base_name
+# does not get substituted before mkmap-flat.awk is run.
+SHLIB_MKMAP_OPTS = -v pe_dll=libgcc_s_$(EH_MODEL)-$(SHLIB_SOVERSION)$(SHLIB_EXT)
+SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver
diff --git a/gcc-4.4.0/gcc/config/i386/t-cygwin b/gcc-4.4.0/gcc/config/i386/t-cygwin
new file mode 100644
index 000000000..d58d84fcd
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-cygwin
@@ -0,0 +1,30 @@
+# If we are building next to winsup, this will let us find the real
+# limits.h when building libgcc2. Otherwise, winsup must be installed
+# first.
+LIBGCC2_INCLUDES += -I$(srcdir)/../winsup/include \
+ -I$(srcdir)/../winsup/cygwin/include
+
+cygwin1.o: $(srcdir)/config/i386/cygwin1.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) $(TM_P_H)
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/i386/cygwin1.c
+
+cygwin2.o: $(srcdir)/config/i386/cygwin2.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) $(TM_P_H)
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/i386/cygwin2.c
+
+# Cygwin-specific parts of LIB_SPEC
+SHLIB_LC = -lcygwin -luser32 -lkernel32 -ladvapi32 -lshell32
+
+# We have already included one of the t-{dw2,sjlj}-eh fragments for EH_MODEL
+SHLIB_EH_EXTENSION = $(subst -dw2,,-$(EH_MODEL))
+
+# Cygwin uses different conventions than MinGW; override generic SHLIB_ def'ns here.
+SHLIB_IMPLIB = @shlib_base_name@$(SHLIB_EXT).a
+SHLIB_SONAME = cyggcc_s$(SHLIB_EH_EXTENSION)-$(SHLIB_SOVERSION)$(SHLIB_EXT)
+# This must match the definitions of SHLIB_SONAME/SHLIB_SOVERSION and LIBGCC_SONAME.
+# We'd like to use SHLIB_SONAME here too, and we can, since
+# we don't rely on shlib_base_name substitution for it.
+SHLIB_MKMAP_OPTS = -v pe_dll=$(SHLIB_SONAME)
+
diff --git a/gcc-4.4.0/gcc/config/i386/t-darwin b/gcc-4.4.0/gcc/config/i386/t-darwin
new file mode 100644
index 000000000..fb5bbe78c
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-darwin
@@ -0,0 +1,4 @@
+MULTILIB_OPTIONS = m64
+MULTILIB_DIRNAMES = x86_64
+LIB2_SIDITI_CONV_FUNCS=yes
+LIB2FUNCS_EXTRA = $(srcdir)/config/darwin-64.c
diff --git a/gcc-4.4.0/gcc/config/i386/t-darwin64 b/gcc-4.4.0/gcc/config/i386/t-darwin64
new file mode 100644
index 000000000..81b4565ac
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-darwin64
@@ -0,0 +1,8 @@
+LIB2_SIDITI_CONV_FUNCS=yes
+LIB2FUNCS_EXTRA = $(srcdir)/config/darwin-64.c
+
+MULTILIB_OPTIONS = m32
+MULTILIB_DIRNAMES = i386
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc-4.4.0/gcc/config/i386/t-djgpp b/gcc-4.4.0/gcc/config/i386/t-djgpp
new file mode 100644
index 000000000..7b54b7ba7
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-djgpp
@@ -0,0 +1,2 @@
+# Location of DJGPP's header directory.
+NATIVE_SYSTEM_HEADER_DIR=$(DJDIR)/include
diff --git a/gcc-4.4.0/gcc/config/i386/t-dw2-eh b/gcc-4.4.0/gcc/config/i386/t-dw2-eh
new file mode 100644
index 000000000..ffcc39aea
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-dw2-eh
@@ -0,0 +1,3 @@
+
+# We are using Dwarf-2 EH.
+EH_MODEL = dw2
diff --git a/gcc-4.4.0/gcc/config/i386/t-fprules-softfp b/gcc-4.4.0/gcc/config/i386/t-fprules-softfp
new file mode 100644
index 000000000..0b0068f90
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-fprules-softfp
@@ -0,0 +1,6 @@
+softfp_float_modes := tf
+softfp_int_modes := si di ti
+softfp_extensions := sftf dftf xftf
+softfp_truncations := tfsf tfdf tfxf
+softfp_machine_header := i386/sfp-machine.h
+softfp_exclude_libgcc2 := n
diff --git a/gcc-4.4.0/gcc/config/i386/t-gmm_malloc b/gcc-4.4.0/gcc/config/i386/t-gmm_malloc
new file mode 100644
index 000000000..c37f8a759
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-gmm_malloc
@@ -0,0 +1,6 @@
+# Install gmm_malloc.h as mm_malloc.h.
+
+EXTRA_HEADERS += mm_malloc.h
+mm_malloc.h: $(srcdir)/config/i386/gmm_malloc.h
+ rm -f $@
+ cat $^ > $@
diff --git a/gcc-4.4.0/gcc/config/i386/t-gthr-win32 b/gcc-4.4.0/gcc/config/i386/t-gthr-win32
new file mode 100644
index 000000000..204a485bc
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-gthr-win32
@@ -0,0 +1,3 @@
+# We hide calls to w32api needed for w32 thread support here:
+LIB2FUNCS_EXTRA = $(srcdir)/config/i386/gthr-win32.c
+
diff --git a/gcc-4.4.0/gcc/config/i386/t-i386 b/gcc-4.4.0/gcc/config/i386/t-i386
new file mode 100644
index 000000000..b7dd8edfa
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-i386
@@ -0,0 +1,16 @@
+LIB1ASMSRC = i386/chkstk.asm
+LIB1ASMFUNCS = _chkstk
+
+i386.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+ $(RTL_H) $(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h \
+ $(REAL_H) insn-config.h conditions.h output.h insn-codes.h \
+ $(INSN_ATTR_H) $(FLAGS_H) $(C_COMMON_H) except.h $(FUNCTION_H) \
+ $(RECOG_H) $(EXPR_H) $(OPTABS_H) toplev.h $(BASIC_BLOCK_H) \
+ $(GGC_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h $(CGRAPH_H) \
+ $(TREE_GIMPLE_H) dwarf2.h $(DF_H) tm-constrs.h $(PARAMS_H)
+
+i386-c.o: $(srcdir)/config/i386/i386-c.c \
+ $(srcdir)/config/i386/i386-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(FLAGS_H) $(C_COMMON_H) $(GGC_H) \
+ $(TARGET_H) $(TARGET_DEF_H) $(CPPLIB_H) $(C_PRAGMA_H)
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $(srcdir)/config/i386/i386-c.c
diff --git a/gcc-4.4.0/gcc/config/i386/t-i386elf b/gcc-4.4.0/gcc/config/i386/t-i386elf
new file mode 100644
index 000000000..9560d9055
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-i386elf
@@ -0,0 +1,4 @@
+# For svr4 we build crtbegin.o and crtend.o which serve to add begin and
+# end labels to the .ctors and .dtors section when we link using gcc.
+
+EXTRA_PARTS=crtbegin.o crtend.o
diff --git a/gcc-4.4.0/gcc/config/i386/t-interix b/gcc-4.4.0/gcc/config/i386/t-interix
new file mode 100644
index 000000000..09bfe1f21
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-interix
@@ -0,0 +1,4 @@
+winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
+ $(TM_P_H) toplev.h $(HASHTAB_H) $(GGC_H)
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $(srcdir)/config/i386/winnt.c
diff --git a/gcc-4.4.0/gcc/config/i386/t-linux b/gcc-4.4.0/gcc/config/i386/t-linux
new file mode 100644
index 000000000..4c6bb51e3
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-linux
@@ -0,0 +1,5 @@
+# On 64bit we do not need any exports for glibc for 64-bit libgcc_s.
+# Need to support TImode for x86. Override the settings from
+# t-slibgcc-elf-ver and t-linux
+SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver \
+ $(srcdir)/config/i386/libgcc-glibc.ver
diff --git a/gcc-4.4.0/gcc/config/i386/t-linux64 b/gcc-4.4.0/gcc/config/i386/t-linux64
new file mode 100644
index 000000000..36378d87e
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-linux64
@@ -0,0 +1,17 @@
+# On Debian, Ubuntu and other derivative distributions, the 32bit libraries
+# are found in /lib32 and /usr/lib32, /lib64 and /usr/lib64 are symlinks to
+# /lib and /usr/lib, while other distributions install libraries into /lib64
+# and /usr/lib64. The LSB does not enforce the use of /lib64 and /usr/lib64,
+# it doesn't tell anything about the 32bit libraries on those systems. Set
+# MULTILIB_OSDIRNAMES according to what is found on the target.
+
+MULTILIB_OPTIONS = m64/m32
+MULTILIB_DIRNAMES = 64 32
+MULTILIB_OSDIRNAMES = ../lib64 $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o \
+ crtbeginT.o crtprec32.o crtprec64.o crtprec80.o \
+ crtfastmath.o
diff --git a/gcc-4.4.0/gcc/config/i386/t-mingw32 b/gcc-4.4.0/gcc/config/i386/t-mingw32
new file mode 100644
index 000000000..bfe15069c
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-mingw32
@@ -0,0 +1,5 @@
+# Match SYSTEM_INCLUDE_DIR
+NATIVE_SYSTEM_HEADER_DIR = /mingw/include
+
+# MinGW-specific parts of LIB_SPEC
+SHLIB_LC = -lmingw32 -lmingwex -lmoldname -lmsvcrt -luser32 -lkernel32 -ladvapi32 -lshell32
diff --git a/gcc-4.4.0/gcc/config/i386/t-netware b/gcc-4.4.0/gcc/config/i386/t-netware
new file mode 100644
index 000000000..2d3a828a5
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-netware
@@ -0,0 +1,9 @@
+TARGET_LIBGCC2_CFLAGS = -mpreferred-stack-boundary=2 -fomit-frame-pointer
+
+netware.o: $(srcdir)/config/i386/netware.c $(RTL_H) $(TREE_H) $(CONFIG_H) $(TM_P_H)
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $(srcdir)/config/i386/netware.c
+
+# We don't need some of GCC's own include files.
+USER_H = $(srcdir)/ginclude/stdarg.h \
+ $(srcdir)/ginclude/varargs.h \
+ $(EXTRA_HEADERS) $(LANG_EXTRA_HEADERS)
diff --git a/gcc-4.4.0/gcc/config/i386/t-nto b/gcc-4.4.0/gcc/config/i386/t-nto
new file mode 100644
index 000000000..b80ff8029
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-nto
@@ -0,0 +1,4 @@
+CRTSTUFF_T_CFLAGS = -fno-omit-frame-pointer -fPIC
+TARGET_LIBGCC2_CFLAGS = -fPIC -fexceptions
+
+EXTRA_PARTS = crtbegin.o
diff --git a/gcc-4.4.0/gcc/config/i386/t-nwld b/gcc-4.4.0/gcc/config/i386/t-nwld
new file mode 100644
index 000000000..db5b905b7
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-nwld
@@ -0,0 +1,30 @@
+CRTSTUFF_T_CFLAGS = -mpreferred-stack-boundary=2
+CRT0STUFF_T_CFLAGS = -mpreferred-stack-boundary=2 $(INCLUDES)
+# this is a slight misuse (it's not an assembler file)
+CRT0_S = $(srcdir)/config/i386/netware-crt0.c
+MCRT0_S = $(srcdir)/config/i386/netware-crt0.c
+
+$(T)libgcc.def: $(srcdir)/config/i386/t-nwld
+ echo "module libgcc_s" >$@
+
+$(T)libc.def: $(srcdir)/config/i386/t-nwld
+ echo "module libc" >$@
+
+$(T)libcpre.def: $(srcdir)/config/i386/t-nwld
+ echo "start _LibCPrelude" >$@
+ echo "exit _LibCPostlude" >>$@
+ echo "check _LibCCheckUnload" >>$@
+
+$(T)posixpre.def: $(srcdir)/config/i386/t-nwld
+ echo "start POSIX_Start" >$@
+ echo "exit POSIX_Stop" >>$@
+ echo "check POSIX_CheckUnload" >>$@
+
+nwld.o: $(srcdir)/config/i386/nwld.c $(RTL_H) $(TREE_H) $(CONFIG_H) $(TM_P_H)
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $(srcdir)/config/i386/nwld.c
+
+
+s-crt0: $(srcdir)/unwind-dw2-fde.h
+
+# To keep DRIVER_DEFINES correct.
+SHLIB_LINK = dummy
diff --git a/gcc-4.4.0/gcc/config/i386/t-openbsd b/gcc-4.4.0/gcc/config/i386/t-openbsd
new file mode 100644
index 000000000..183046340
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-openbsd
@@ -0,0 +1,6 @@
+# gdb gets confused if pic code is linked with non pic
+# We cope by building variants of libgcc.
+MULTILIB_OPTIONS = fpic
+MULTILIB_MATCHES=fpic=fPIC
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc-4.4.0/gcc/config/i386/t-pmm_malloc b/gcc-4.4.0/gcc/config/i386/t-pmm_malloc
new file mode 100644
index 000000000..109009fbf
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-pmm_malloc
@@ -0,0 +1,6 @@
+# Install pmm_malloc.h as mm_malloc.h.
+
+EXTRA_HEADERS += mm_malloc.h
+mm_malloc.h: $(srcdir)/config/i386/pmm_malloc.h
+ rm -f $@
+ cat $^ > $@
diff --git a/gcc-4.4.0/gcc/config/i386/t-rtems-i386 b/gcc-4.4.0/gcc/config/i386/t-rtems-i386
new file mode 100644
index 000000000..6486e09f2
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-rtems-i386
@@ -0,0 +1,51 @@
+#
+# This file was based on t-sol2 - x68 Solaris implementation. Actually,
+# the source code to create crti.o anf crtn.o are exactly the same
+# as the ones for Solaris. Later, we might want to have a RTEMS's
+# version of these files.
+#
+
+$(T)crti.o: $(srcdir)/config/i386/sol2-ci.asm $(GCC_PASSES)
+ sed -e '/^!/d' <$(srcdir)/config/i386/sol2-ci.asm >crti.s
+ $(GCC_FOR_TARGET) -c -o $(T)crti.o crti.s
+$(T)crtn.o: $(srcdir)/config/i386/sol2-cn.asm $(GCC_PASSES)
+ sed -e '/^!/d' <$(srcdir)/config/i386/sol2-cn.asm >crtn.s
+ $(GCC_FOR_TARGET) -c -o $(T)crtn.o crtn.s
+
+# We want fine grained libraries, so use the new code to build the
+# floating point emulation libraries.
+FPBIT = fp-bit.c
+DPBIT = dp-bit.c
+
+LIB2FUNCS_EXTRA = xp-bit.c
+
+dp-bit.c: $(srcdir)/config/fp-bit.c
+ echo '#ifdef __LITTLE_ENDIAN__' > dp-bit.c
+ echo '#define FLOAT_BIT_ORDER_MISMATCH' >>dp-bit.c
+ echo '#endif' >> dp-bit.c
+ cat $(srcdir)/config/fp-bit.c >> dp-bit.c
+
+fp-bit.c: $(srcdir)/config/fp-bit.c
+ echo '#define FLOAT' > fp-bit.c
+ echo '#ifdef __LITTLE_ENDIAN__' >> fp-bit.c
+ echo '#define FLOAT_BIT_ORDER_MISMATCH' >>fp-bit.c
+ echo '#endif' >> fp-bit.c
+ cat $(srcdir)/config/fp-bit.c >> fp-bit.c
+
+xp-bit.c: $(srcdir)/config/fp-bit.c
+ echo '#define EXTENDED_FLOAT_STUBS' > xp-bit.c
+ cat $(srcdir)/config/fp-bit.c >> xp-bit.c
+
+MULTILIB_OPTIONS = mtune=i486/mtune=pentium/mtune=pentiumpro \
+msoft-float
+MULTILIB_DIRNAMES= m486 mpentium mpentiumpro soft-float
+MULTILIB_MATCHES = msoft-float=mno-m80387
+MULTILIB_MATCHES += mtune?pentium=mtune?k6 mtune?pentiumpro=mtune?mathlon
+MULTILIB_EXCEPTIONS = \
+mtune=pentium/*msoft-float* \
+mtune=pentiumpro/*msoft-float*
+
+EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
diff --git a/gcc-4.4.0/gcc/config/i386/t-sjlj-eh b/gcc-4.4.0/gcc/config/i386/t-sjlj-eh
new file mode 100644
index 000000000..c9085f432
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-sjlj-eh
@@ -0,0 +1,3 @@
+
+# We are using SjLj EH.
+EH_MODEL = sjlj
diff --git a/gcc-4.4.0/gcc/config/i386/t-sol2-10 b/gcc-4.4.0/gcc/config/i386/t-sol2-10
new file mode 100644
index 000000000..64e5928ca
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-sol2-10
@@ -0,0 +1,11 @@
+MULTILIB_OPTIONS = m32/m64
+MULTILIB_DIRNAMES = 32 amd64
+MULTILIB_OSDIRNAMES = . amd64
+
+LIBGCC = stmp-multilib
+INSTALL_LIBGCC = install-multilib
+
+# GCC contains i386 assembler sources for some of the startfiles
+# which aren't appropriate for amd64. Just use the installed
+# versions of: crt1.o crti.o crtn.o gcrt1.o
+EXTRA_MULTILIB_PARTS=gmon.o crtbegin.o crtend.o
diff --git a/gcc-4.4.0/gcc/config/i386/t-svr3dbx b/gcc-4.4.0/gcc/config/i386/t-svr3dbx
new file mode 100644
index 000000000..517113791
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-svr3dbx
@@ -0,0 +1,7 @@
+# gas 1.38.1 supporting dbx-in-coff requires a link script.
+
+svr3.ifile: $(srcdir)/config/i386/svr3.ifile
+ rm -f svr3.ifile; cp $(srcdir)/config/i386/svr3.ifile .
+
+svr3z.ifile: $(srcdir)/config/i386/svr3z.ifile
+ rm -f svr3z.ifile; cp $(srcdir)/config/i386/svr3z.ifile .
diff --git a/gcc-4.4.0/gcc/config/i386/t-vxworks b/gcc-4.4.0/gcc/config/i386/t-vxworks
new file mode 100644
index 000000000..c440b1f90
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-vxworks
@@ -0,0 +1,8 @@
+# Multilibs for VxWorks.
+
+# Build multilibs for normal, -mrtp, and -mrtp -fPIC.
+MULTILIB_OPTIONS = mrtp fPIC
+MULTILIB_DIRNAMES =
+MULTILIB_MATCHES = fPIC=fpic
+MULTILIB_EXCEPTIONS = fPIC
+
diff --git a/gcc-4.4.0/gcc/config/i386/t-vxworksae b/gcc-4.4.0/gcc/config/i386/t-vxworksae
new file mode 100644
index 000000000..0cea2bbf3
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/t-vxworksae
@@ -0,0 +1,5 @@
+# Multilibs for VxWorks AE.
+
+MULTILIB_OPTIONS = mvthreads
+MULTILIB_MATCHES =
+MULTILIB_EXCEPTIONS =
diff --git a/gcc-4.4.0/gcc/config/i386/tmmintrin.h b/gcc-4.4.0/gcc/config/i386/tmmintrin.h
new file mode 100644
index 000000000..9835669ca
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/tmmintrin.h
@@ -0,0 +1,244 @@
+/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.1. */
+
+#ifndef _TMMINTRIN_H_INCLUDED
+#define _TMMINTRIN_H_INCLUDED
+
+#ifndef __SSSE3__
+# error "SSSE3 instruction set not enabled"
+#else
+
+/* We need definitions from the SSE3, SSE2 and SSE header files*/
+#include <pmmintrin.h>
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadds_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadd_pi32 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hadds_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsub_pi32 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi8 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi16 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_epi32 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi8 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi16 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sign_pi32 (__m64 __X, __m64 __Y)
+{
+ return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_epi8(__m128i __X, __m128i __Y, const int __N)
+{
+ return (__m128i) __builtin_ia32_palignr128 ((__v2di)__X,
+ (__v2di)__Y, __N * 8);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N)
+{
+ return (__m64) __builtin_ia32_palignr ((__v1di)__X,
+ (__v1di)__Y, __N * 8);
+}
+#else
+#define _mm_alignr_epi8(X, Y, N) \
+ ((__m128i) __builtin_ia32_palignr128 ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), \
+ (int)(N) * 8))
+#define _mm_alignr_pi8(X, Y, N) \
+ ((__m64) __builtin_ia32_palignr ((__v1di)(__m64)(X), \
+ (__v1di)(__m64)(Y), \
+ (int)(N) * 8))
+#endif
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi8 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi16 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_epi32 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi8 (__m64 __X)
+{
+ return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi16 (__m64 __X)
+{
+ return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_abs_pi32 (__m64 __X)
+{
+ return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
+}
+
+#endif /* __SSSE3__ */
+
+#endif /* _TMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/unix.h b/gcc-4.4.0/gcc/config/i386/unix.h
new file mode 100644
index 000000000..ae5d31dfb
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/unix.h
@@ -0,0 +1,76 @@
+/* Definitions for Unix assembler syntax for the Intel 80386.
+ Copyright (C) 1988, 1994, 1999, 2000, 2001, 2002, 2007, 2009
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* This file defines the aspects of assembler syntax
+ that are the same for all the i386 Unix systems
+ (though they may differ in non-Unix systems). */
+
+/* Define macro used to output shift-double opcodes when the shift
+ count is in %cl. Some assemblers require %cl as an argument;
+ some don't. This macro controls what to do: by default, don't
+ print %cl. */
+#define SHIFT_DOUBLE_OMITS_COUNT 1
+
+/* Define the syntax of pseudo-ops, labels and comments. */
+
+/* String containing the assembler's comment-starter. */
+
+#define ASM_COMMENT_START "/"
+
+/* Output to assembler file text saying following lines
+ may contain character constants, extra white space, comments, etc. */
+
+#define ASM_APP_ON "/APP\n"
+
+/* Output to assembler file text saying following lines
+ no longer contain unusual constructs. */
+
+#define ASM_APP_OFF "/NO_APP\n"
+
+/* Output before read-only data. */
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* Output before writable (initialized) data. */
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+/* Output before writable (uninitialized) data. */
+
+#define BSS_SECTION_ASM_OP "\t.bss"
+
+/* Globalizing directive for a label. */
+#define GLOBAL_ASM_OP ".globl "
+
+/* By default, target has a 80387, uses IEEE compatible arithmetic,
+ and returns float values in the 387. */
+#undef TARGET_SUBTARGET_DEFAULT
+#define TARGET_SUBTARGET_DEFAULT \
+ (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS)
+
+/* By default, 64-bit mode uses 128-bit long double. */
+#undef TARGET_SUBTARGET64_DEFAULT
+#define TARGET_SUBTARGET64_DEFAULT \
+ MASK_128BIT_LONG_DOUBLE
diff --git a/gcc-4.4.0/gcc/config/i386/vx-common.h b/gcc-4.4.0/gcc/config/i386/vx-common.h
new file mode 100644
index 000000000..b4bea845f
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/vx-common.h
@@ -0,0 +1,26 @@
+/* IA32 VxWorks and VxWorks AE target definitions.
+ Copyright (C) 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+ asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+/* VxWorks uses the same ABI as Solaris 10. */
+
+#define SUBTARGET_RETURN_IN_MEMORY(TYPE, FNTYPE) \
+ ix86_sol10_return_in_memory (TYPE, FNTYPE)
diff --git a/gcc-4.4.0/gcc/config/i386/vxworks.h b/gcc-4.4.0/gcc/config/i386/vxworks.h
new file mode 100644
index 000000000..fe5630004
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/vxworks.h
@@ -0,0 +1,78 @@
+/* IA32 VxWorks target definitions for GNU compiler.
+ Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+ Updated by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define HANDLE_SYSV_PRAGMA 1
+
+#undef TARGET_VERSION
+#define TARGET_VERSION fprintf (stderr, " (80586, VxWorks syntax)");
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{v:-v} %{Qy:} %{n} %{T} %{Ym,*} %{Yd,*} %{Wa,*:%*}"
+
+#define TARGET_OS_CPP_BUILTINS() \
+ do \
+ { \
+ VXWORKS_OS_CPP_BUILTINS (); \
+ if (TARGET_386) \
+ builtin_define ("CPU=I80386"); \
+ else if (TARGET_486) \
+ builtin_define ("CPU=I80486"); \
+ else if (TARGET_PENTIUM) \
+ { \
+ builtin_define ("CPU=PENTIUM"); \
+ builtin_define ("CPU_VARIANT=PENTIUM"); \
+ } \
+ else if (TARGET_PENTIUMPRO) \
+ { \
+ builtin_define ("CPU=PENTIUM2"); \
+ builtin_define ("CPU_VARIANT=PENTIUMPRO"); \
+ } \
+ else if (TARGET_PENTIUM4) \
+ { \
+ builtin_define ("CPU=PENTIUM4"); \
+ builtin_define ("CPU_VARIANT=PENTIUM4"); \
+ } \
+ } \
+ while (0)
+
+#undef CPP_SPEC
+#define CPP_SPEC VXWORKS_ADDITIONAL_CPP_SPEC
+#undef LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+#undef LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC
+
+#undef SUBTARGET_SWITCHES
+#define SUBTARGET_SWITCHES EXTRA_SUBTARGET_SWITCHES
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS VXWORKS_OVERRIDE_OPTIONS
+
+/* No _mcount profiling on VxWorks. */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE,LABELNO) VXWORKS_FUNCTION_PROFILER(FILE,LABELNO)
+
+/* We cannot use PC-relative accesses for VxWorks PIC because there is no
+ fixed gap between segments. */
+#undef ASM_PREFERRED_EH_DATA_FORMAT
diff --git a/gcc-4.4.0/gcc/config/i386/vxworksae.h b/gcc-4.4.0/gcc/config/i386/vxworksae.h
new file mode 100644
index 000000000..29fa8af20
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/vxworksae.h
@@ -0,0 +1,26 @@
+/* IA32 VxWorks AE target definitions for GNU compiler.
+ Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+ Contributed by CodeSourcery, LLC.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* On VxWorks AE, we only want SIMNT. */
+#undef VXWORKS_CPU_DEFINE
+#define VXWORKS_CPU_DEFINE() \
+ do \
+ builtin_define ("CPU=SIMNT"); \
+ while (0)
diff --git a/gcc-4.4.0/gcc/config/i386/w32-unwind.h b/gcc-4.4.0/gcc/config/i386/w32-unwind.h
new file mode 100755
index 000000000..ce8be2369
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/w32-unwind.h
@@ -0,0 +1,212 @@
+/* Definitions for Dwarf2 EH unwind support for Windows32 targets
+ Copyright (C) 2007, 2009
+ Free Software Foundation, Inc.
+ Contributed by Pascal Obry <obry@adacore.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+/* This file implements the md_fallback_frame_state_for routine for
+ Windows, triggered when the GCC table based unwinding process hits a
+ frame for which no unwind info has been registered. This typically
+ occurs when raising an exception from a signal handler, because the
+ handler is actually called from the OS kernel.
+
+ The basic idea is to detect that we are indeed trying to unwind past a
+ signal handler and to fill out the GCC internal unwinding structures for
+ the OS kernel frame as if it had been directly called from the
+ interrupted context.
+
+ This is all assuming that the code to set the handler asked the kernel
+ to pass a pointer to such context information.
+
+ There is three main parts.
+
+ 1) The first thing to do is to check if we are in a signal context. If
+ not we can just return as there is nothing to do. We are probably on
+ some foreign code for which no unwind frame can be found. If this is
+ a call from the Windows signal handler, then:
+
+ 2) We must get the signal context information.
+
+ * With the standard exception filter:
+
+ This is on Windows pointed to by an EXCEPTION_POINTERS. We know that
+ the signal handle will call an UnhandledExceptionFilter with this
+ parameter. The spec for this routine is:
+
+ LONG WINAPI UnhandledExceptionFilter(struct _EXCEPTION_POINTERS*);
+
+ So the pointer to struct _EXCEPTION_POINTERS must be somewhere on the
+ stack.
+
+ This was found experimentally to always be at offset 0 of the context
+ frame in all cases handled by this implementation.
+
+ * With the SEH exception handler:
+
+ In this case the signal context is directly on the stack as the SEH
+ exception handler has the following prototype:
+
+ DWORD
+ SEH_error_handler (PEXCEPTION_RECORD ExceptionRecord,
+ PVOID EstablisherFrame,
+ PCONTEXT ContextRecord,
+ PVOID DispatcherContext)
+
+ This was found experimentally to always be at offset 56 of the
+ context frame in all cases handled by this implementation.
+
+ 3) When we have the signal context we just have to save some registers
+ and set the return address based on the program counter (Eip).
+
+ Note that this implementation follows closely the same principles as the
+ GNU/Linux and OSF ones. */
+
+#define WIN32_MEAN_AND_LEAN
+#include <windows.h>
+/* Patterns found experimentally to be on a Windows signal handler */
+
+/* In a standard exception filter */
+
+#define SIG_PAT1 \
+ (pc_[-2] == 0xff && pc_[-1] == 0xd0 /* call %eax */ \
+ && pc_[0] == 0x83 && pc_[1] == 0xf8) /* cmp 0xdepl,%eax */
+
+#define SIG_PAT2 \
+ (pc_[-5] == 0xe8 && pc_[-4] == 0x68 /* call (depl16) */ \
+ && pc_[0] == 0xc3) /* ret */
+
+/* In a Win32 SEH handler */
+
+#define SIG_SEH1 \
+ (pc_[-5] == 0xe8 /* call addr */ \
+ && pc_[0] == 0x83 && pc_[1] == 0xc4 /* add 0xval,%esp */ \
+ && pc_[3] == 0xb8) /* mov 0xval,%eax */
+
+#define SIG_SEH2 \
+ (pc_[-5] == 0x8b && pc_[-4] == 0x4d /* mov depl(%ebp),%ecx */ \
+ && pc_[0] == 0x64 && pc_[1] == 0x8b) /* mov %fs:(0),<reg> */ \
+
+/* In the GCC alloca (stack probing) */
+
+#define SIG_ALLOCA \
+ (pc_[-1] == 0x83 /* orl $0x0,(%ecx) */ \
+ && pc_[0] == 0x9 && pc_[1] == 0 \
+ && pc_[2] == 0x2d && pc_[3] == 0 /* subl $0x1000,%eax */ \
+ && pc_[4] == 0x10 && pc_[5] == 0)
+
+
+#define MD_FALLBACK_FRAME_STATE_FOR i386_w32_fallback_frame_state
+
+static _Unwind_Reason_Code
+i386_w32_fallback_frame_state (struct _Unwind_Context *context,
+ _Unwind_FrameState *fs)
+
+{
+ void * ctx_ra_ = (void *)(context->ra); /* return address */
+ void * ctx_cfa_ = (void *)(context->cfa); /* context frame address */
+ unsigned char * pc_ = (unsigned char *) ctx_ra_;
+
+ /* In the test below we look for two specific patterns found
+ experimentally to be in the Windows signal handler. */
+
+ if (SIG_PAT1 || SIG_PAT2 || SIG_SEH1 || SIG_SEH2)
+ {
+ PEXCEPTION_POINTERS weinfo_;
+ PCONTEXT proc_ctx_;
+ long new_cfa_;
+
+ if (SIG_SEH1)
+ proc_ctx_ = (PCONTEXT) (*(int*)(ctx_cfa_ + 56));
+ else if (SIG_SEH2)
+ proc_ctx_ = (PCONTEXT) (*(int*)(ctx_cfa_ + 8));
+ else
+ {
+ weinfo_ = (PEXCEPTION_POINTERS) (*(int*)ctx_cfa_);
+ proc_ctx_ = weinfo_->ContextRecord;
+ }
+
+ /* The new context frame address is the stack pointer. */
+
+ new_cfa_ = proc_ctx_->Esp;
+ fs->regs.cfa_how = CFA_REG_OFFSET;
+ fs->regs.cfa_reg = __builtin_dwarf_sp_column();
+ fs->regs.cfa_offset = new_cfa_ - (long) ctx_cfa_;
+
+ /* Save some registers. */
+
+ fs->regs.reg[0].how = REG_SAVED_OFFSET;
+ fs->regs.reg[0].loc.offset = (long)&proc_ctx_->Eax - new_cfa_;
+ fs->regs.reg[3].how = REG_SAVED_OFFSET;
+ fs->regs.reg[3].loc.offset = (long)&proc_ctx_->Ebx - new_cfa_;
+ fs->regs.reg[1].how = REG_SAVED_OFFSET;
+ fs->regs.reg[1].loc.offset = (long)&proc_ctx_->Ecx - new_cfa_;
+ fs->regs.reg[2].how = REG_SAVED_OFFSET;
+ fs->regs.reg[2].loc.offset = (long)&proc_ctx_->Edx - new_cfa_;
+ fs->regs.reg[6].how = REG_SAVED_OFFSET;
+ fs->regs.reg[6].loc.offset = (long)&proc_ctx_->Esi - new_cfa_;
+ fs->regs.reg[7].how = REG_SAVED_OFFSET;
+ fs->regs.reg[7].loc.offset = (long)&proc_ctx_->Edi - new_cfa_;
+ fs->regs.reg[9].how = REG_SAVED_OFFSET;
+ fs->regs.reg[9].loc.offset = (long)&proc_ctx_->Eip - new_cfa_;
+ fs->regs.reg[4].how = REG_SAVED_OFFSET;
+ fs->regs.reg[4].loc.offset = (long)&proc_ctx_->Ebp - new_cfa_;
+
+ /* Set the return address to Eip + 1. As we can be called multiple
+ times we use another register for this. */
+
+ proc_ctx_->Dr0 = proc_ctx_->Eip + 1;
+ fs->regs.reg[8].how = REG_SAVED_OFFSET;
+ fs->regs.reg[8].loc.offset = (long)&proc_ctx_->Dr0 - new_cfa_;
+ fs->retaddr_column = 8;
+ return _URC_NO_REASON;
+ }
+
+ /* Unwinding through _alloca, propagating from a trap triggered by
+ one of it's probes prior to the real SP adjustment. The only
+ operations of interest performed is "pushl %ecx", followed by
+ ecx clobbering. */
+
+ else if (SIG_ALLOCA)
+ {
+ /* Only one push between entry in _alloca and the probe trap. */
+ long new_cfa_ = (long) ctx_cfa_ + 4;
+
+ fs->regs.cfa_how = CFA_REG_OFFSET;
+ fs->regs.cfa_reg = __builtin_dwarf_sp_column();
+ fs->regs.cfa_offset = new_cfa_ - (long) ctx_cfa_;
+
+ /* The saved value of %ecx is at CFA - 4 */
+ fs->regs.reg[1].how = REG_SAVED_OFFSET;
+ fs->regs.reg[1].loc.offset = -4;
+
+ /* and what is stored at the CFA is the return address. */
+ fs->retaddr_column = 8;
+ fs->regs.reg[8].how = REG_SAVED_OFFSET;
+ fs->regs.reg[8].loc.offset = 0;
+
+ return _URC_NO_REASON;
+ }
+ else
+ return _URC_END_OF_STACK;
+}
diff --git a/gcc-4.4.0/gcc/config/i386/winnt-cxx.c b/gcc-4.4.0/gcc/config/i386/winnt-cxx.c
new file mode 100644
index 000000000..9df7cf645
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/winnt-cxx.c
@@ -0,0 +1,143 @@
+/* Target support for C++ classes on Windows.
+ Contributed by Danny Smith (dannysmith@users.sourceforge.net)
+ Copyright (C) 2005, 2007
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "tree.h"
+#include "cp/cp-tree.h" /* this is why we're a separate module */
+#include "flags.h"
+#include "tm_p.h"
+#include "toplev.h"
+#include "hashtab.h"
+
+bool
+i386_pe_type_dllimport_p (tree decl)
+{
+ gcc_assert (TREE_CODE (decl) == VAR_DECL
+ || TREE_CODE (decl) == FUNCTION_DECL);
+
+ if (TARGET_NOP_FUN_DLLIMPORT && TREE_CODE (decl) == FUNCTION_DECL)
+ return false;
+
+ /* We ignore the dllimport attribute for inline member functions.
+ This differs from MSVC behavior which treats it like GNUC
+ 'extern inline' extension. Also ignore for template
+ instantiations with linkonce semantics and artificial methods. */
+ if (TREE_CODE (decl) == FUNCTION_DECL
+ && (DECL_DECLARED_INLINE_P (decl)
+ || DECL_TEMPLATE_INSTANTIATION (decl)
+ || DECL_ARTIFICIAL (decl)))
+ return false;
+
+
+ /* Don't mark defined functions as dllimport. This code will only be
+ reached if we see a non-inline function defined out-of-class. */
+ else if (TREE_CODE (decl) == FUNCTION_DECL
+ && (DECL_INITIAL (decl)))
+ return false;
+
+ /* Don't allow definitions of static data members in dllimport class,
+ If vtable data is marked as DECL_EXTERNAL, import it; otherwise just
+ ignore the class attribute. */
+ else if (TREE_CODE (decl) == VAR_DECL
+ && TREE_STATIC (decl) && TREE_PUBLIC (decl)
+ && !DECL_EXTERNAL (decl))
+ {
+ if (!DECL_VIRTUAL_P (decl))
+ error ("definition of static data member %q+D of "
+ "dllimport'd class", decl);
+ return false;
+ }
+
+ return true;
+}
+
+
+bool
+i386_pe_type_dllexport_p (tree decl)
+{
+ gcc_assert (TREE_CODE (decl) == VAR_DECL
+ || TREE_CODE (decl) == FUNCTION_DECL);
+ /* Avoid exporting compiler-generated default dtors and copy ctors.
+ The only artificial methods that need to be exported are virtual
+ and non-virtual thunks. */
+ if (TREE_CODE (TREE_TYPE (decl)) == METHOD_TYPE
+ && DECL_ARTIFICIAL (decl) && !DECL_THUNK_P (decl))
+ return false;
+ return true;
+}
+
+static inline void maybe_add_dllimport (tree decl)
+{
+ if (i386_pe_type_dllimport_p (decl))
+ DECL_DLLIMPORT_P (decl) = 1;
+}
+
+void
+i386_pe_adjust_class_at_definition (tree t)
+{
+ tree member;
+
+ gcc_assert (CLASS_TYPE_P (t));
+
+ /* We only look at dllimport. The only thing that dllexport does is
+ add stuff to a '.drectiv' section at end-of-file, so no need to do
+ anything for dllexport'd classes until we generate RTL. */
+ if (lookup_attribute ("dllimport", TYPE_ATTRIBUTES (t)) == NULL_TREE)
+ return;
+
+ /* We don't actually add the attribute to the decl, just set the flag
+ that signals that the address of this symbol is not a compile-time
+ constant. Any subsequent out-of-class declaration of members wil
+ cause the DECL_DLLIMPORT_P flag to be unset.
+ (See tree.c: merge_dllimport_decl_attributes).
+ That is just right since out-of class declarations can only be a
+ definition. We recheck the class members at RTL generation to
+ emit warnings if this has happened. Definition of static data member
+ of dllimport'd class always causes an error (as per MS compiler).
+ */
+
+ /* Check static VAR_DECL's. */
+ for (member = TYPE_FIELDS (t); member; member = TREE_CHAIN (member))
+ if (TREE_CODE (member) == VAR_DECL)
+ maybe_add_dllimport (member);
+
+ /* Check FUNCTION_DECL's. */
+ for (member = TYPE_METHODS (t); member; member = TREE_CHAIN (member))
+ if (TREE_CODE (member) == FUNCTION_DECL)
+ maybe_add_dllimport (member);
+
+ /* Check vtables */
+ for (member = CLASSTYPE_VTABLES (t); member; member = TREE_CHAIN (member))
+ if (TREE_CODE (member) == VAR_DECL)
+ maybe_add_dllimport (member);
+
+/* We leave typeinfo tables alone. We can't mark TI objects as
+ dllimport, since the address of a secondary VTT may be needed
+ for static initialization of a primary VTT. VTT's of
+ dllimport'd classes should always be link-once COMDAT. */
+}
diff --git a/gcc-4.4.0/gcc/config/i386/winnt-stubs.c b/gcc-4.4.0/gcc/config/i386/winnt-stubs.c
new file mode 100644
index 000000000..a9c7cd739
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/winnt-stubs.c
@@ -0,0 +1,52 @@
+/* Dummy subroutines for language-specific support on Windows.
+ Contributed by Danny Smith (dannysmith@users.sourceforge.net)
+ Copyright (C) 2005, 2007
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "tree.h"
+#include "flags.h"
+#include "tm_p.h"
+#include "toplev.h"
+#include "hashtab.h"
+
+bool
+i386_pe_type_dllimport_p (tree decl ATTRIBUTE_UNUSED)
+{
+ return false;
+}
+
+
+bool
+i386_pe_type_dllexport_p (tree decl ATTRIBUTE_UNUSED)
+{
+ return false;
+}
+
+
+void
+i386_pe_adjust_class_at_definition (tree t ATTRIBUTE_UNUSED)
+{ }
diff --git a/gcc-4.4.0/gcc/config/i386/winnt.c b/gcc-4.4.0/gcc/config/i386/winnt.c
new file mode 100644
index 000000000..ae9196c59
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/winnt.c
@@ -0,0 +1,644 @@
+/* Subroutines for insn-output.c for Windows NT.
+ Contributed by Douglas Rupp (drupp@cs.washington.edu)
+ Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+ 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "rtl.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "tree.h"
+#include "flags.h"
+#include "tm_p.h"
+#include "toplev.h"
+#include "hashtab.h"
+#include "langhooks.h"
+#include "ggc.h"
+#include "target.h"
+
+/* i386/PE specific attribute support.
+
+ i386/PE has two new attributes:
+ dllexport - for exporting a function/variable that will live in a dll
+ dllimport - for importing a function/variable from a dll
+
+ Microsoft allows multiple declspecs in one __declspec, separating
+ them with spaces. We do NOT support this. Instead, use __declspec
+ multiple times.
+*/
+
+/* Handle a "shared" attribute;
+ arguments as in struct attribute_spec.handler. */
+tree
+ix86_handle_shared_attribute (tree *node, tree name,
+ tree args ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+ if (TREE_CODE (*node) != VAR_DECL)
+ {
+ warning (OPT_Wattributes, "%qs attribute only applies to variables",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+}
+
+/* Handle a "selectany" attribute;
+ arguments as in struct attribute_spec.handler. */
+tree
+ix86_handle_selectany_attribute (tree *node, tree name,
+ tree args ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED,
+ bool *no_add_attrs)
+{
+ /* The attribute applies only to objects that are initialized and have
+ external linkage. However, we may not know about initialization
+ until the language frontend has processed the decl. We'll check for
+ initialization later in encode_section_info. */
+ if (TREE_CODE (*node) != VAR_DECL || !TREE_PUBLIC (*node))
+ {
+ error ("%qs attribute applies only to initialized variables"
+ " with external linkage", IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+}
+
+
+/* Return the type that we should use to determine if DECL is
+ imported or exported. */
+
+static tree
+associated_type (tree decl)
+{
+ return (DECL_CONTEXT (decl) && TYPE_P (DECL_CONTEXT (decl))
+ ? DECL_CONTEXT (decl) : NULL_TREE);
+}
+
+/* Return true if DECL should be a dllexport'd object. */
+
+static bool
+i386_pe_determine_dllexport_p (tree decl)
+{
+ tree assoc;
+
+ if (TREE_CODE (decl) != VAR_DECL && TREE_CODE (decl) != FUNCTION_DECL)
+ return false;
+
+ if (lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl)))
+ return true;
+
+ /* Also mark class members of exported classes with dllexport. */
+ assoc = associated_type (decl);
+ if (assoc && lookup_attribute ("dllexport", TYPE_ATTRIBUTES (assoc)))
+ return i386_pe_type_dllexport_p (decl);
+
+ return false;
+}
+
+/* Return true if DECL should be a dllimport'd object. */
+
+static bool
+i386_pe_determine_dllimport_p (tree decl)
+{
+ tree assoc;
+
+ if (TREE_CODE (decl) != VAR_DECL && TREE_CODE (decl) != FUNCTION_DECL)
+ return false;
+
+ /* Lookup the attribute in addition to checking the DECL_DLLIMPORT_P flag.
+ We may need to override an earlier decision. */
+ if (DECL_DLLIMPORT_P (decl))
+ return true;
+
+ /* The DECL_DLLIMPORT_P flag was set for decls in the class definition
+ by targetm.cxx.adjust_class_at_definition. Check again to emit
+ warnings if the class attribute has been overridden by an
+ out-of-class definition. */
+ assoc = associated_type (decl);
+ if (assoc && lookup_attribute ("dllimport", TYPE_ATTRIBUTES (assoc)))
+ return i386_pe_type_dllimport_p (decl);
+
+ return false;
+}
+
+/* Handle the -mno-fun-dllimport target switch. */
+
+bool
+i386_pe_valid_dllimport_attribute_p (const_tree decl)
+{
+ if (TARGET_NOP_FUN_DLLIMPORT && TREE_CODE (decl) == FUNCTION_DECL)
+ return false;
+ return true;
+}
+
+/* Return string which is the function name, identified by ID, modified
+ with a suffix consisting of an atsign (@) followed by the number of
+ bytes of arguments. If ID is NULL use the DECL_NAME as base. If
+ FASTCALL is true, also add the FASTCALL_PREFIX.
+ Return NULL if no change required. */
+
+static tree
+gen_stdcall_or_fastcall_suffix (tree decl, tree id, bool fastcall)
+{
+ HOST_WIDE_INT total = 0;
+ const char *old_str = IDENTIFIER_POINTER (id != NULL_TREE ? id : DECL_NAME (decl));
+ char *new_str, *p;
+ tree type = TREE_TYPE (decl);
+ tree arg;
+ function_args_iterator args_iter;
+
+ gcc_assert (TREE_CODE (decl) == FUNCTION_DECL);
+
+ if (prototype_p (type))
+ {
+ /* This attribute is ignored for variadic functions. */
+ if (stdarg_p (type))
+ return NULL_TREE;
+
+ /* Quit if we hit an incomplete type. Error is reported
+ by convert_arguments in c-typeck.c or cp/typeck.c. */
+ FOREACH_FUNCTION_ARGS(type, arg, args_iter)
+ {
+ HOST_WIDE_INT parm_size;
+ HOST_WIDE_INT parm_boundary_bytes = PARM_BOUNDARY / BITS_PER_UNIT;
+
+ if (! COMPLETE_TYPE_P (arg))
+ break;
+
+ parm_size = int_size_in_bytes (arg);
+ if (parm_size < 0)
+ break;
+
+ /* Must round up to include padding. This is done the same
+ way as in store_one_arg. */
+ parm_size = ((parm_size + parm_boundary_bytes - 1)
+ / parm_boundary_bytes * parm_boundary_bytes);
+ total += parm_size;
+ }
+ }
+ /* Assume max of 8 base 10 digits in the suffix. */
+ p = new_str = XALLOCAVEC (char, 1 + strlen (old_str) + 1 + 8 + 1);
+ if (fastcall)
+ *p++ = FASTCALL_PREFIX;
+ sprintf (p, "%s@" HOST_WIDE_INT_PRINT_DEC, old_str, total);
+
+ return get_identifier (new_str);
+}
+
+/* Maybe decorate and get a new identifier for the DECL of a stdcall or
+ fastcall function. The original identifier is supplied in ID. */
+
+static tree
+i386_pe_maybe_mangle_decl_assembler_name (tree decl, tree id)
+{
+ tree new_id = NULL_TREE;
+
+ if (TREE_CODE (decl) == FUNCTION_DECL)
+ {
+ tree type_attributes = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+ if (lookup_attribute ("stdcall", type_attributes))
+ new_id = gen_stdcall_or_fastcall_suffix (decl, id, false);
+ else if (lookup_attribute ("fastcall", type_attributes))
+ new_id = gen_stdcall_or_fastcall_suffix (decl, id, true);
+ }
+
+ return new_id;
+}
+
+/* This is used as a target hook to modify the DECL_ASSEMBLER_NAME
+ in the language-independent default hook
+ langhooks,c:lhd_set_decl_assembler_name ()
+ and in cp/mangle,c:mangle_decl (). */
+tree
+i386_pe_mangle_decl_assembler_name (tree decl, tree id)
+{
+ tree new_id = i386_pe_maybe_mangle_decl_assembler_name (decl, id);
+
+ return (new_id ? new_id : id);
+}
+
+void
+i386_pe_encode_section_info (tree decl, rtx rtl, int first)
+{
+ rtx symbol;
+ int flags;
+
+ /* Do this last, due to our frobbing of DECL_DLLIMPORT_P above. */
+ default_encode_section_info (decl, rtl, first);
+
+ /* Careful not to prod global register variables. */
+ if (!MEM_P (rtl))
+ return;
+
+ symbol = XEXP (rtl, 0);
+ gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+
+ switch (TREE_CODE (decl))
+ {
+ case FUNCTION_DECL:
+ /* FIXME: Imported stdcall names are not modified by the Ada frontend.
+ Check and decorate the RTL name now. */
+ if (strcmp (lang_hooks.name, "GNU Ada") == 0)
+ {
+ tree new_id;
+ tree old_id = DECL_ASSEMBLER_NAME (decl);
+ const char* asm_str = IDENTIFIER_POINTER (old_id);
+ /* Do not change the identifier if a verbatim asmspec
+ or if stdcall suffix already added. */
+ if (!(*asm_str == '*' || strchr (asm_str, '@'))
+ && (new_id = i386_pe_maybe_mangle_decl_assembler_name (decl,
+ old_id)))
+ XSTR (symbol, 0) = IDENTIFIER_POINTER (new_id);
+ }
+ break;
+
+ case VAR_DECL:
+ if (lookup_attribute ("selectany", DECL_ATTRIBUTES (decl)))
+ {
+ if (DECL_INITIAL (decl)
+ /* If an object is initialized with a ctor, the static
+ initialization and destruction code for it is present in
+ each unit defining the object. The code that calls the
+ ctor is protected by a link-once guard variable, so that
+ the object still has link-once semantics, */
+ || TYPE_NEEDS_CONSTRUCTING (TREE_TYPE (decl)))
+ make_decl_one_only (decl);
+ else
+ error ("%q+D:'selectany' attribute applies only to "
+ "initialized objects", decl);
+ }
+ break;
+
+ default:
+ return;
+ }
+
+ /* Mark the decl so we can tell from the rtl whether the object is
+ dllexport'd or dllimport'd. tree.c: merge_dllimport_decl_attributes
+ handles dllexport/dllimport override semantics. */
+ flags = (SYMBOL_REF_FLAGS (symbol) &
+ ~(SYMBOL_FLAG_DLLIMPORT | SYMBOL_FLAG_DLLEXPORT));
+ if (i386_pe_determine_dllexport_p (decl))
+ flags |= SYMBOL_FLAG_DLLEXPORT;
+ else if (i386_pe_determine_dllimport_p (decl))
+ {
+ flags |= SYMBOL_FLAG_DLLIMPORT;
+ /* If we went through the associated_type path, this won't already
+ be set. Though, frankly, this seems wrong, and should be fixed
+ elsewhere. */
+ if (!DECL_DLLIMPORT_P (decl))
+ {
+ DECL_DLLIMPORT_P (decl) = 1;
+ flags &= ~SYMBOL_FLAG_LOCAL;
+ }
+ }
+ SYMBOL_REF_FLAGS (symbol) = flags;
+}
+
+bool
+i386_pe_binds_local_p (const_tree exp)
+{
+ /* PE does not do dynamic binding. Indeed, the only kind of
+ non-local reference comes from a dllimport'd symbol. */
+ if ((TREE_CODE (exp) == VAR_DECL || TREE_CODE (exp) == FUNCTION_DECL)
+ && DECL_DLLIMPORT_P (exp))
+ return false;
+
+ return true;
+}
+
+/* Also strip the fastcall prefix and stdcall suffix. */
+
+const char *
+i386_pe_strip_name_encoding_full (const char *str)
+{
+ const char *p;
+ const char *name = default_strip_name_encoding (str);
+
+ /* Strip leading '@' on fastcall symbols. */
+ if (*name == '@')
+ name++;
+
+ /* Strip trailing "@n". */
+ p = strchr (name, '@');
+ if (p)
+ return ggc_alloc_string (name, p - name);
+
+ return name;
+}
+
+void
+i386_pe_unique_section (tree decl, int reloc)
+{
+ int len;
+ const char *name, *prefix;
+ char *string;
+
+ name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
+ name = i386_pe_strip_name_encoding_full (name);
+
+ /* The object is put in, for example, section .text$foo.
+ The linker will then ultimately place them in .text
+ (everything from the $ on is stripped). Don't put
+ read-only data in .rdata section to avoid a PE linker
+ bug when .rdata$* grouped sections are used in code
+ without a .rdata section. */
+ if (TREE_CODE (decl) == FUNCTION_DECL)
+ prefix = ".text$";
+ else if (decl_readonly_section (decl, reloc))
+ prefix = ".rdata$";
+ else
+ prefix = ".data$";
+ len = strlen (name) + strlen (prefix);
+ string = XALLOCAVEC (char, len + 1);
+ sprintf (string, "%s%s", prefix, name);
+
+ DECL_SECTION_NAME (decl) = build_string (len, string);
+}
+
+/* Select a set of attributes for section NAME based on the properties
+ of DECL and whether or not RELOC indicates that DECL's initializer
+ might contain runtime relocations.
+
+ We make the section read-only and executable for a function decl,
+ read-only for a const data decl, and writable for a non-const data decl.
+
+ If the section has already been defined, to not allow it to have
+ different attributes, as (1) this is ambiguous since we're not seeing
+ all the declarations up front and (2) some assemblers (e.g. SVR4)
+ do not recognize section redefinitions. */
+/* ??? This differs from the "standard" PE implementation in that we
+ handle the SHARED variable attribute. Should this be done for all
+ PE targets? */
+
+#define SECTION_PE_SHARED SECTION_MACH_DEP
+
+unsigned int
+i386_pe_section_type_flags (tree decl, const char *name, int reloc)
+{
+ static htab_t htab;
+ unsigned int flags;
+ unsigned int **slot;
+
+ /* The names we put in the hashtable will always be the unique
+ versions given to us by the stringtable, so we can just use
+ their addresses as the keys. */
+ if (!htab)
+ htab = htab_create (31, htab_hash_pointer, htab_eq_pointer, NULL);
+
+ if (decl && TREE_CODE (decl) == FUNCTION_DECL)
+ flags = SECTION_CODE;
+ else if (decl && decl_readonly_section (decl, reloc))
+ flags = 0;
+ else if (current_function_decl
+ && cfun
+ && crtl->subsections.unlikely_text_section_name
+ && strcmp (name, crtl->subsections.unlikely_text_section_name) == 0)
+ flags = SECTION_CODE;
+ else if (!decl
+ && (!current_function_decl || !cfun)
+ && strcmp (name, UNLIKELY_EXECUTED_TEXT_SECTION_NAME) == 0)
+ flags = SECTION_CODE;
+ else
+ {
+ flags = SECTION_WRITE;
+
+ if (decl && TREE_CODE (decl) == VAR_DECL
+ && lookup_attribute ("shared", DECL_ATTRIBUTES (decl)))
+ flags |= SECTION_PE_SHARED;
+ }
+
+ if (decl && DECL_ONE_ONLY (decl))
+ flags |= SECTION_LINKONCE;
+
+ /* See if we already have an entry for this section. */
+ slot = (unsigned int **) htab_find_slot (htab, name, INSERT);
+ if (!*slot)
+ {
+ *slot = (unsigned int *) xmalloc (sizeof (unsigned int));
+ **slot = flags;
+ }
+ else
+ {
+ if (decl && **slot != flags)
+ error ("%q+D causes a section type conflict", decl);
+ }
+
+ return flags;
+}
+
+void
+i386_pe_asm_named_section (const char *name, unsigned int flags,
+ tree decl)
+{
+ char flagchars[8], *f = flagchars;
+
+ if ((flags & (SECTION_CODE | SECTION_WRITE)) == 0)
+ /* readonly data */
+ {
+ *f++ ='d'; /* This is necessary for older versions of gas. */
+ *f++ ='r';
+ }
+ else
+ {
+ if (flags & SECTION_CODE)
+ *f++ = 'x';
+ if (flags & SECTION_WRITE)
+ *f++ = 'w';
+ if (flags & SECTION_PE_SHARED)
+ *f++ = 's';
+ }
+
+ *f = '\0';
+
+ fprintf (asm_out_file, "\t.section\t%s,\"%s\"\n", name, flagchars);
+
+ if (flags & SECTION_LINKONCE)
+ {
+ /* Functions may have been compiled at various levels of
+ optimization so we can't use `same_size' here.
+ Instead, have the linker pick one, without warning.
+ If 'selectany' attribute has been specified, MS compiler
+ sets 'discard' characteristic, rather than telling linker
+ to warn of size or content mismatch, so do the same. */
+ bool discard = (flags & SECTION_CODE)
+ || lookup_attribute ("selectany",
+ DECL_ATTRIBUTES (decl));
+ fprintf (asm_out_file, "\t.linkonce %s\n",
+ (discard ? "discard" : "same_size"));
+ }
+}
+
+void
+i386_pe_asm_output_aligned_decl_common (FILE *stream, tree decl,
+ const char *name, HOST_WIDE_INT size,
+ HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+{
+ HOST_WIDE_INT rounded;
+
+ /* Compute as in assemble_noswitch_variable, since we don't actually
+ support aligned common. */
+ rounded = size ? size : 1;
+ rounded += (BIGGEST_ALIGNMENT / BITS_PER_UNIT) - 1;
+ rounded = (rounded / (BIGGEST_ALIGNMENT / BITS_PER_UNIT)
+ * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
+
+ i386_pe_maybe_record_exported_symbol (decl, name, 1);
+
+ fprintf (stream, "\t.comm\t");
+ assemble_name (stream, name);
+ fprintf (stream, ", " HOST_WIDE_INT_PRINT_DEC "\t" ASM_COMMENT_START
+ " " HOST_WIDE_INT_PRINT_DEC "\n",
+ rounded, size);
+}
+
+/* The Microsoft linker requires that every function be marked as
+ DT_FCN. When using gas on cygwin, we must emit appropriate .type
+ directives. */
+
+#include "gsyms.h"
+
+/* Mark a function appropriately. This should only be called for
+ functions for which we are not emitting COFF debugging information.
+ FILE is the assembler output file, NAME is the name of the
+ function, and PUB is nonzero if the function is globally
+ visible. */
+
+void
+i386_pe_declare_function_type (FILE *file, const char *name, int pub)
+{
+ fprintf (file, "\t.def\t");
+ assemble_name (file, name);
+ fprintf (file, ";\t.scl\t%d;\t.type\t%d;\t.endef\n",
+ pub ? (int) C_EXT : (int) C_STAT,
+ (int) DT_FCN << N_BTSHFT);
+}
+
+/* Keep a list of external functions. */
+
+struct extern_list GTY(())
+{
+ struct extern_list *next;
+ tree decl;
+ const char *name;
+};
+
+static GTY(()) struct extern_list *extern_head;
+
+/* Assemble an external function reference. We need to keep a list of
+ these, so that we can output the function types at the end of the
+ assembly. We can't output the types now, because we might see a
+ definition of the function later on and emit debugging information
+ for it then. */
+
+void
+i386_pe_record_external_function (tree decl, const char *name)
+{
+ struct extern_list *p;
+
+ p = (struct extern_list *) ggc_alloc (sizeof *p);
+ p->next = extern_head;
+ p->decl = decl;
+ p->name = name;
+ extern_head = p;
+}
+
+/* Keep a list of exported symbols. */
+
+struct export_list GTY(())
+{
+ struct export_list *next;
+ const char *name;
+ int is_data; /* used to type tag exported symbols. */
+};
+
+static GTY(()) struct export_list *export_head;
+
+/* Assemble an export symbol entry. We need to keep a list of
+ these, so that we can output the export list at the end of the
+ assembly. We used to output these export symbols in each function,
+ but that causes problems with GNU ld when the sections are
+ linkonce. */
+
+void
+i386_pe_maybe_record_exported_symbol (tree decl, const char *name, int is_data)
+{
+ rtx symbol;
+ struct export_list *p;
+
+ symbol = XEXP (DECL_RTL (decl), 0);
+ gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+ if (!SYMBOL_REF_DLLEXPORT_P (symbol))
+ return;
+
+ p = (struct export_list *) ggc_alloc (sizeof *p);
+ p->next = export_head;
+ p->name = name;
+ p->is_data = is_data;
+ export_head = p;
+}
+
+/* This is called at the end of assembly. For each external function
+ which has not been defined, we output a declaration now. We also
+ output the .drectve section. */
+
+void
+i386_pe_file_end (void)
+{
+ struct extern_list *p;
+
+ ix86_file_end ();
+
+ for (p = extern_head; p != NULL; p = p->next)
+ {
+ tree decl;
+
+ decl = p->decl;
+
+ /* Positively ensure only one declaration for any given symbol. */
+ if (! TREE_ASM_WRITTEN (decl)
+ && TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
+ {
+ TREE_ASM_WRITTEN (decl) = 1;
+ i386_pe_declare_function_type (asm_out_file, p->name,
+ TREE_PUBLIC (decl));
+ }
+ }
+
+ if (export_head)
+ {
+ struct export_list *q;
+ drectve_section ();
+ for (q = export_head; q != NULL; q = q->next)
+ {
+ fprintf (asm_out_file, "\t.ascii \" -export:%s%s\"\n",
+ default_strip_name_encoding (q->name),
+ (q->is_data ? ",data" : ""));
+ }
+ }
+}
+
+#include "gt-winnt.h"
diff --git a/gcc-4.4.0/gcc/config/i386/wmmintrin.h b/gcc-4.4.0/gcc/config/i386/wmmintrin.h
new file mode 100644
index 000000000..2c4bdc99a
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/wmmintrin.h
@@ -0,0 +1,120 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 10.1. */
+
+#ifndef _WMMINTRIN_H_INCLUDED
+#define _WMMINTRIN_H_INCLUDED
+
+/* We need definitions from the SSE2 header file. */
+#include <emmintrin.h>
+
+#if !defined (__AES__) && !defined (__PCLMUL__)
+# error "AES/PCLMUL instructions not enabled"
+#else
+
+/* AES */
+
+#ifdef __AES__
+/* Performs 1 round of AES decryption of the first m128i using
+ the second m128i as a round key. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesdec_si128 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Performs the last round of AES decryption of the first m128i
+ using the second m128i as a round key. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesdeclast_si128 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X,
+ (__v2di)__Y);
+}
+
+/* Performs 1 round of AES encryption of the first m128i using
+ the second m128i as a round key. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesenc_si128 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Performs the last round of AES encryption of the first m128i
+ using the second m128i as a round key. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesenclast_si128 (__m128i __X, __m128i __Y)
+{
+ return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, (__v2di)__Y);
+}
+
+/* Performs the InverseMixColumn operation on the source m128i
+ and stores the result into m128i destination. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aesimc_si128 (__m128i __X)
+{
+ return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X);
+}
+
+/* Generates a m128i round key for the input m128i AES cipher key and
+ byte round constant. The second parameter must be a compile time
+ constant. */
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_aeskeygenassist_si128 (__m128i __X, const int __C)
+{
+ return (__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)__X, __C);
+}
+#else
+#define _mm_aeskeygenassist_si128(X, C) \
+ ((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X), \
+ (int)(C)))
+#endif
+#endif /* __AES__ */
+
+/* PCLMUL */
+
+#ifdef __PCLMUL__
+/* Performs carry-less integer multiplication of 64-bit halves of
+ 128-bit input operands. The third parameter inducates which 64-bit
+ haves of the input parameters v1 and v2 should be used. It must be
+ a compile time constant. */
+#ifdef __OPTIMIZE__
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I)
+{
+ return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X,
+ (__v2di)__Y, __I);
+}
+#else
+#define _mm_clmulepi64_si128(X, Y, I) \
+ ((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X), \
+ (__v2di)(__m128i)(Y), (int)(I)))
+#endif
+#endif /* __PCLMUL__ */
+
+#endif /* __AES__/__PCLMUL__ */
+
+#endif /* _WMMINTRIN_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/x-cygwin b/gcc-4.4.0/gcc/config/i386/x-cygwin
new file mode 100644
index 000000000..cfd7758cf
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/x-cygwin
@@ -0,0 +1,4 @@
+host-cygwin.o : $(srcdir)/config/i386/host-cygwin.c $(CONFIG_H) $(SYSTEM_H) \
+ coretypes.h hosthooks.h $(HOSTHOOKS_DEF_H) toplev.h diagnostic.h
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/i386/host-cygwin.c
diff --git a/gcc-4.4.0/gcc/config/i386/x-darwin b/gcc-4.4.0/gcc/config/i386/x-darwin
new file mode 100644
index 000000000..9a3b0f262
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/x-darwin
@@ -0,0 +1,4 @@
+host-i386-darwin.o : $(srcdir)/config/i386/host-i386-darwin.c \
+ $(CONFIG_H) $(SYSTEM_H) coretypes.h hosthooks.h $(HOSTHOOKS_DEF_H) \
+ config/host-darwin.h
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc-4.4.0/gcc/config/i386/x-i386 b/gcc-4.4.0/gcc/config/i386/x-i386
new file mode 100644
index 000000000..9f03de453
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/x-i386
@@ -0,0 +1,4 @@
+driver-i386.o : $(srcdir)/config/i386/driver-i386.c \
+ $(srcdir)/config/i386/cpuid.h \
+ $(CONFIG_H) $(SYSTEM_H) $(TM_H) coretypes.h
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
diff --git a/gcc-4.4.0/gcc/config/i386/x-mingw32 b/gcc-4.4.0/gcc/config/i386/x-mingw32
new file mode 100644
index 000000000..0af4f5c3f
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/x-mingw32
@@ -0,0 +1,13 @@
+#
+# Make local_includedir relative to EXEC_PREFIX
+#
+local_includedir=$(libsubdir)/$(unlibsubdir)/..`echo $(exec_prefix) | sed -e 's|^$(prefix)||' -e 's|/[^/]*|/..|g'`/include
+
+# On MinGW, we use "%IA64d" to print 64-bit integers, and the format-checking
+# code does not handle that, so we have to disable checking here.
+WERROR_FLAGS += -Wno-format
+
+host-mingw32.o : $(srcdir)/config/i386/host-mingw32.c $(CONFIG_H) $(SYSTEM_H) \
+ coretypes.h hosthooks.h hosthooks-def.h toplev.h $(DIAGNOSTIC_H) $(HOOKS_H)
+ $(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+ $(srcdir)/config/i386/host-mingw32.c
diff --git a/gcc-4.4.0/gcc/config/i386/x86-64.h b/gcc-4.4.0/gcc/config/i386/x86-64.h
new file mode 100644
index 000000000..46dcad156
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/x86-64.h
@@ -0,0 +1,98 @@
+/* OS independent definitions for AMD x86-64.
+ Copyright (C) 2001, 2005, 2007, 2009 Free Software Foundation, Inc.
+ Contributed by Bo Thorsen <bo@suse.de>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#undef ASM_COMMENT_START
+#define ASM_COMMENT_START "#"
+
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(n) \
+ (TARGET_64BIT ? dbx64_register_map[n] : svr4_dbx_register_map[n])
+
+/* Output assembler code to FILE to call the profiler. */
+#define NO_PROFILE_COUNTERS 1
+
+#undef MCOUNT_NAME
+#define MCOUNT_NAME "mcount"
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (TARGET_64BIT ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_64BIT ? "long int" : "int")
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1_cpu) %{profile:-p}"
+
+#undef ASM_SPEC
+#define ASM_SPEC "%{v:-V} %{Qy:} %{!Qn:-Qy} %{n} %{T} %{Ym,*} %{Yd,*} \
+ %{Wa,*:%*} %{m32:--32} %{m64:--64}"
+
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+ x86_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+
+#undef ASM_OUTPUT_ALIGNED_COMMON
+#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN) \
+ x86_elf_aligned_common (FILE, NAME, SIZE, ALIGN);
+
+/* This is used to align code labels according to Intel recommendations. */
+
+#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP) \
+ do { \
+ if ((LOG) != 0) { \
+ if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG)); \
+ else { \
+ fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP)); \
+ /* Make sure that we have at least 8 byte alignment if > 8 byte \
+ alignment is preferred. */ \
+ if ((LOG) > 3 && (1 << (LOG)) > ((MAX_SKIP) + 1)) \
+ fprintf ((FILE), "\t.p2align 3\n"); \
+ } \
+ } \
+ } while (0)
+#endif
+
+
+/* i386 System V Release 4 uses DWARF debugging info.
+ x86-64 ABI specifies DWARF2. */
+
+#define DWARF2_DEBUGGING_INFO 1
+#define DWARF2_UNWIND_INFO 1
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION x86_64_elf_select_section
+
+#undef TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION x86_64_elf_unique_section
diff --git a/gcc-4.4.0/gcc/config/i386/x86intrin.h b/gcc-4.4.0/gcc/config/i386/x86intrin.h
new file mode 100644
index 000000000..d848811d3
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/x86intrin.h
@@ -0,0 +1,70 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _X86INTRIN_H_INCLUDED
+#define _X86INTRIN_H_INCLUDED
+
+#ifdef __MMX__
+#include <mmintrin.h>
+#endif
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#ifdef __SSE3__
+#include <pmmintrin.h>
+#endif
+
+#ifdef __SSSE3__
+#include <tmmintrin.h>
+#endif
+
+#ifdef __SSE4a__
+#include <ammintrin.h>
+#endif
+
+#if defined (__SSE4_2__) || defined (__SSE4_1__)
+#include <smmintrin.h>
+#endif
+
+#ifdef __SSE5__
+#include <bmmintrin.h>
+#endif
+
+#if defined (__AES__) || defined (__PCLMUL__)
+#include <wmmintrin.h>
+#endif
+
+/* For including AVX instructions */
+#include <immintrin.h>
+
+#ifdef __3dNOW__
+#include <mm3dnow.h>
+#endif
+
+#endif /* _X86INTRIN_H_INCLUDED */
diff --git a/gcc-4.4.0/gcc/config/i386/xm-cygwin.h b/gcc-4.4.0/gcc/config/i386/xm-cygwin.h
new file mode 100644
index 000000000..bd2238729
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/xm-cygwin.h
@@ -0,0 +1,22 @@
+/* Configuration for GCC for hosting on Windows NT.
+ using a unix style C library.
+ Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2007
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define HOST_EXECUTABLE_SUFFIX ".exe"
diff --git a/gcc-4.4.0/gcc/config/i386/xm-djgpp.h b/gcc-4.4.0/gcc/config/i386/xm-djgpp.h
new file mode 100644
index 000000000..c3758ea9e
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/xm-djgpp.h
@@ -0,0 +1,84 @@
+/* Configuration for GCC for Intel 80386 running DJGPP.
+ Copyright (C) 1988, 1996, 1998, 1999, 2000, 2001, 2004, 2007
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Use semicolons to separate elements of a path. */
+#define PATH_SEPARATOR ';'
+
+#define HOST_EXECUTABLE_SUFFIX ".exe"
+
+/* System dependent initialization for collect2
+ to tell system() to act like Unix. */
+#define COLLECT2_HOST_INITIALIZATION \
+ do { __system_flags |= (__system_allow_multiple_cmds \
+ | __system_emulate_chdir); } while (0)
+
+/* Define a version appropriate for DOS. */
+#undef XREF_FILE_NAME
+#define XREF_FILE_NAME(xref_file, file) \
+ do { \
+ const char xref_ext[] = ".gxref"; \
+ strcpy (xref_file, file); \
+ s = basename (xref_file); \
+ t = strchr (s, '.'); \
+ if (t) \
+ strcpy (t, xref_ext); \
+ else \
+ strcat (xref_file, xref_ext); \
+ } while (0)
+
+#undef GCC_DRIVER_HOST_INITIALIZATION
+#define GCC_DRIVER_HOST_INITIALIZATION \
+ do { \
+ /* If the environment variable DJDIR is not defined, then DJGPP is not \
+ installed correctly and GCC will quickly become confused with the \
+ default prefix settings. Report the problem now so the user doesn't \
+ receive deceptive "file not found" error messages later. */ \
+ char *djdir = getenv ("DJDIR"); \
+ if (djdir == NULL) \
+ { \
+ /* DJDIR is automatically defined by the DJGPP environment config \
+ file pointed to by the environment variable DJGPP. Examine DJGPP \
+ to try and figure out what's wrong. */ \
+ char *djgpp = getenv ("DJGPP"); \
+ if (djgpp == NULL) \
+ fatal ("environment variable DJGPP not defined"); \
+ else if (access (djgpp, R_OK) == 0) \
+ fatal ("environment variable DJGPP points to missing file '%s'", \
+ djgpp); \
+ else \
+ fatal ("environment variable DJGPP points to corrupt file '%s'", \
+ djgpp); \
+ } \
+ } while (0)
+
+/* Canonicalize paths containing '/dev/env/'; used in prefix.c.
+ _fixpath is a djgpp-specific function to canonicalize a path.
+ "/dev/env/DJDIR" evaluates to "c:/djgpp" if DJDIR is "c:/djgpp" for
+ example. It removes any trailing '/', so add it back. */
+/* We cannot free PATH below as it can point to string constant */
+#define UPDATE_PATH_HOST_CANONICALIZE(PATH) \
+ if (memcmp ((PATH), "/dev/env/", sizeof("/dev/env/") - 1) == 0) \
+ { \
+ static char fixed_path[FILENAME_MAX + 1]; \
+ \
+ _fixpath ((PATH), fixed_path); \
+ strcat (fixed_path, "/"); \
+ (PATH) = xstrdup (fixed_path); \
+ }
diff --git a/gcc-4.4.0/gcc/config/i386/xm-mingw32.h b/gcc-4.4.0/gcc/config/i386/xm-mingw32.h
new file mode 100644
index 000000000..e0dd3f372
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/xm-mingw32.h
@@ -0,0 +1,35 @@
+/* Configuration for GCC for hosting on Windows32.
+ using GNU tools and the Windows32 API Library.
+ Copyright (C) 1997, 1998, 1999, 2001, 2002, 2003, 2004, 2007
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#define HOST_EXECUTABLE_SUFFIX ".exe"
+
+#undef PATH_SEPARATOR
+#define PATH_SEPARATOR ';'
+
+/* This is the name of the null device on windows. */
+#define HOST_BIT_BUCKET "nul"
+
+/* The st_ino field of struct stat is always 0. */
+#define HOST_LACKS_INODE_NUMBERS
+
+/* MSVCRT does not support the "ll" format specifier for printing
+ "long long" values. Instead, we use "I64". */
+#define HOST_LONG_LONG_FORMAT "I64"
diff --git a/gcc-4.4.0/gcc/config/i386/xmmintrin.h b/gcc-4.4.0/gcc/config/i386/xmmintrin.h
new file mode 100644
index 000000000..544577eca
--- /dev/null
+++ b/gcc-4.4.0/gcc/config/i386/xmmintrin.h
@@ -0,0 +1,1251 @@
+/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implemented from the specification included in the Intel C++ Compiler
+ User Guide and Reference, version 9.0. */
+
+#ifndef _XMMINTRIN_H_INCLUDED
+#define _XMMINTRIN_H_INCLUDED
+
+#ifndef __SSE__
+# error "SSE instruction set not enabled"
+#else
+
+/* We need type definitions from the MMX header file. */
+#include <mmintrin.h>
+
+/* Get _mm_malloc () and _mm_free (). */
+#include <mm_malloc.h>
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* Internal data types for implementing the intrinsics. */
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+/* Create a selector for use with the SHUFPS instruction. */
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
+
+/* Constants for use with _mm_prefetch. */
+enum _mm_hint
+{
+ _MM_HINT_T0 = 3,
+ _MM_HINT_T1 = 2,
+ _MM_HINT_T2 = 1,
+ _MM_HINT_NTA = 0
+};
+
+/* Bits in the MXCSR. */
+#define _MM_EXCEPT_MASK 0x003f
+#define _MM_EXCEPT_INVALID 0x0001
+#define _MM_EXCEPT_DENORM 0x0002
+#define _MM_EXCEPT_DIV_ZERO 0x0004
+#define _MM_EXCEPT_OVERFLOW 0x0008
+#define _MM_EXCEPT_UNDERFLOW 0x0010
+#define _MM_EXCEPT_INEXACT 0x0020
+
+#define _MM_MASK_MASK 0x1f80
+#define _MM_MASK_INVALID 0x0080
+#define _MM_MASK_DENORM 0x0100
+#define _MM_MASK_DIV_ZERO 0x0200
+#define _MM_MASK_OVERFLOW 0x0400
+#define _MM_MASK_UNDERFLOW 0x0800
+#define _MM_MASK_INEXACT 0x1000
+
+#define _MM_ROUND_MASK 0x6000
+#define _MM_ROUND_NEAREST 0x0000
+#define _MM_ROUND_DOWN 0x2000
+#define _MM_ROUND_UP 0x4000
+#define _MM_ROUND_TOWARD_ZERO 0x6000
+
+#define _MM_FLUSH_ZERO_MASK 0x8000
+#define _MM_FLUSH_ZERO_ON 0x8000
+#define _MM_FLUSH_ZERO_OFF 0x0000
+
+/* Create a vector of zeros. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setzero_ps (void)
+{
+ return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
+}
+
+/* Perform the respective operation on the lower SPFP (single-precision
+ floating-point) values of A and B; the upper three SPFP values are
+ passed through from A. */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ss (__m128 __A)
+{
+ return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ss (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rcpss ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ss (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform the respective operation on the four SPFP values in A and B. */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_add_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sub_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mul_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_div_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sqrt_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rcpps ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rsqrt_ps (__m128 __A)
+{
+ return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform logical bit-wise operations on 128-bit values. */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_ps (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_andps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_ps (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_andnps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_ps (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_orps (__A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_ps (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_xorps (__A, __B);
+}
+
+/* Perform a comparison on the lower SPFP values of A and B. If the
+ comparison is true, place a mask of all ones in the result, otherwise a
+ mask of zeros. The upper three SPFP values are passed through from A. */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+ (__v4sf)
+ __builtin_ia32_cmpltss ((__v4sf) __B,
+ (__v4sf)
+ __A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+ (__v4sf)
+ __builtin_ia32_cmpless ((__v4sf) __B,
+ (__v4sf)
+ __A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+ (__v4sf)
+ __builtin_ia32_cmpnltss ((__v4sf) __B,
+ (__v4sf)
+ __A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf) __A,
+ (__v4sf)
+ __builtin_ia32_cmpnless ((__v4sf) __B,
+ (__v4sf)
+ __A));
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Perform a comparison on the four SPFP values of A and B. For each
+ element, if the comparison is true, place a mask of all ones in the
+ result, otherwise a mask of zeros. */
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpeq_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmplt_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmple_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpgt_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpge_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpneq_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnlt_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnle_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpngt_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpnge_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpord_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmpunord_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Compare the lower SPFP values of A and B and return 1 if true
+ and 0 if false. */
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comieq_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comilt_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comile_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comigt_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comige_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_comineq_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomieq_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomilt_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomile_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomigt_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomige_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ucomineq_ss (__m128 __A, __m128 __B)
+{
+ return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Convert the lower SPFP value to a 32-bit integer according to the current
+ rounding mode. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si32 (__m128 __A)
+{
+ return __builtin_ia32_cvtss2si ((__v4sf) __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ss2si (__m128 __A)
+{
+ return _mm_cvtss_si32 (__A);
+}
+
+#ifdef __x86_64__
+/* Convert the lower SPFP value to a 32-bit integer according to the
+ current rounding mode. */
+
+/* Intel intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64 (__m128 __A)
+{
+ return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+
+/* Microsoft intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_si64x (__m128 __A)
+{
+ return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+#endif
+
+/* Convert the two lower SPFP values to 32-bit integers according to the
+ current rounding mode. Return the integers in packed form. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi32 (__m128 __A)
+{
+ return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_ps2pi (__m128 __A)
+{
+ return _mm_cvtps_pi32 (__A);
+}
+
+/* Truncate the lower SPFP value to a 32-bit integer. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si32 (__m128 __A)
+{
+ return __builtin_ia32_cvttss2si ((__v4sf) __A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ss2si (__m128 __A)
+{
+ return _mm_cvttss_si32 (__A);
+}
+
+#ifdef __x86_64__
+/* Truncate the lower SPFP value to a 32-bit integer. */
+
+/* Intel intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64 (__m128 __A)
+{
+ return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+
+/* Microsoft intrinsic. */
+extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttss_si64x (__m128 __A)
+{
+ return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+#endif
+
+/* Truncate the two lower SPFP values to 32-bit integers. Return the
+ integers in packed form. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttps_pi32 (__m128 __A)
+{
+ return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_ps2pi (__m128 __A)
+{
+ return _mm_cvttps_pi32 (__A);
+}
+
+/* Convert B to a SPFP value and insert it as element zero in A. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi32_ss (__m128 __A, int __B)
+{
+ return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_si2ss (__m128 __A, int __B)
+{
+ return _mm_cvtsi32_ss (__A, __B);
+}
+
+#ifdef __x86_64__
+/* Convert B to a SPFP value and insert it as element zero in A. */
+
+/* Intel intrinsic. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64_ss (__m128 __A, long long __B)
+{
+ return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+
+/* Microsoft intrinsic. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsi64x_ss (__m128 __A, long long __B)
+{
+ return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+#endif
+
+/* Convert the two 32-bit values in B to SPFP form and insert them
+ as the two lower elements in A. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32_ps (__m128 __A, __m64 __B)
+{
+ return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_pi2ps (__m128 __A, __m64 __B)
+{
+ return _mm_cvtpi32_ps (__A, __B);
+}
+
+/* Convert the four signed 16-bit values in A to SPFP form. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi16_ps (__m64 __A)
+{
+ __v4hi __sign;
+ __v2si __hisi, __losi;
+ __v4sf __zero, __ra, __rb;
+
+ /* This comparison against zero gives us a mask that can be used to
+ fill in the missing sign bits in the unpack operations below, so
+ that we get signed values after unpacking. */
+ __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A);
+
+ /* Convert the four words to doublewords. */
+ __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
+ __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
+
+ /* Convert the doublewords to floating point two at a time. */
+ __zero = (__v4sf) _mm_setzero_ps ();
+ __ra = __builtin_ia32_cvtpi2ps (__zero, __hisi);
+ __rb = __builtin_ia32_cvtpi2ps (__ra, __losi);
+
+ return (__m128) __builtin_ia32_movlhps (__ra, __rb);
+}
+
+/* Convert the four unsigned 16-bit values in A to SPFP form. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpu16_ps (__m64 __A)
+{
+ __v2si __hisi, __losi;
+ __v4sf __zero, __ra, __rb;
+
+ /* Convert the four words to doublewords. */
+ __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL);
+ __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
+
+ /* Convert the doublewords to floating point two at a time. */
+ __zero = (__v4sf) _mm_setzero_ps ();
+ __ra = __builtin_ia32_cvtpi2ps (__zero, __hisi);
+ __rb = __builtin_ia32_cvtpi2ps (__ra, __losi);
+
+ return (__m128) __builtin_ia32_movlhps (__ra, __rb);
+}
+
+/* Convert the low four signed 8-bit values in A to SPFP form. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi8_ps (__m64 __A)
+{
+ __v8qi __sign;
+
+ /* This comparison against zero gives us a mask that can be used to
+ fill in the missing sign bits in the unpack operations below, so
+ that we get signed values after unpacking. */
+ __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A);
+
+ /* Convert the four low bytes to words. */
+ __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign);
+
+ return _mm_cvtpi16_ps(__A);
+}
+
+/* Convert the low four unsigned 8-bit values in A to SPFP form. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpu8_ps(__m64 __A)
+{
+ __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL);
+ return _mm_cvtpu16_ps(__A);
+}
+
+/* Convert the four signed 32-bit values in A and B to SPFP form. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
+{
+ __v4sf __zero = (__v4sf) _mm_setzero_ps ();
+ __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A);
+ __v4sf __sfb = __builtin_ia32_cvtpi2ps (__sfa, (__v2si)__B);
+ return (__m128) __builtin_ia32_movlhps (__sfa, __sfb);
+}
+
+/* Convert the four SPFP values in A to four signed 16-bit integers. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi16(__m128 __A)
+{
+ __v4sf __hisf = (__v4sf)__A;
+ __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf);
+ __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf);
+ __v2si __losi = __builtin_ia32_cvtps2pi (__losf);
+ return (__m64) __builtin_ia32_packssdw (__hisi, __losi);
+}
+
+/* Convert the four SPFP values in A to four signed 8-bit integers. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_pi8(__m128 __A)
+{
+ __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
+ return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL);
+}
+
+/* Selects four specific SPFP values from A and B based on MASK. */
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask)
+{
+ return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask);
+}
+#else
+#define _mm_shuffle_ps(A, B, MASK) \
+ ((__m128) __builtin_ia32_shufps ((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), (int)(MASK)))
+#endif
+
+/* Selects and interleaves the upper two SPFP values from A and B. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpackhi_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Selects and interleaves the lower two SPFP values from A and B. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_unpacklo_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Sets the upper two SPFP values with 64-bits of data loaded from P;
+ the lower two values are passed through from A. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadh_pi (__m128 __A, __m64 const *__P)
+{
+ return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P);
+}
+
+/* Stores the upper two SPFP values of A into P. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeh_pi (__m64 *__P, __m128 __A)
+{
+ __builtin_ia32_storehps ((__v2sf *)__P, (__v4sf)__A);
+}
+
+/* Moves the upper two values of B into the lower two values of A. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movehl_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Moves the lower two values of B into the upper two values of A. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movelh_ps (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Sets the lower two SPFP values with 64-bits of data loaded from P;
+ the upper two values are passed through from A. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadl_pi (__m128 __A, __m64 const *__P)
+{
+ return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (const __v2sf *)__P);
+}
+
+/* Stores the lower two SPFP values of A into P. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storel_pi (__m64 *__P, __m128 __A)
+{
+ __builtin_ia32_storelps ((__v2sf *)__P, (__v4sf)__A);
+}
+
+/* Creates a 4-bit mask from the most significant bits of the SPFP values. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_ps (__m128 __A)
+{
+ return __builtin_ia32_movmskps ((__v4sf)__A);
+}
+
+/* Return the contents of the control register. */
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_getcsr (void)
+{
+ return __builtin_ia32_stmxcsr ();
+}
+
+/* Read exception bits from the control register. */
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_EXCEPTION_STATE (void)
+{
+ return _mm_getcsr() & _MM_EXCEPT_MASK;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_EXCEPTION_MASK (void)
+{
+ return _mm_getcsr() & _MM_MASK_MASK;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_ROUNDING_MODE (void)
+{
+ return _mm_getcsr() & _MM_ROUND_MASK;
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_GET_FLUSH_ZERO_MODE (void)
+{
+ return _mm_getcsr() & _MM_FLUSH_ZERO_MASK;
+}
+
+/* Set the control register to I. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setcsr (unsigned int __I)
+{
+ __builtin_ia32_ldmxcsr (__I);
+}
+
+/* Set exception bits in the control register. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_EXCEPTION_STATE(unsigned int __mask)
+{
+ _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_EXCEPTION_MASK (unsigned int __mask)
+{
+ _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_ROUNDING_MODE (unsigned int __mode)
+{
+ _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
+{
+ _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode);
+}
+
+/* Create a vector with element 0 as F and the rest zero. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ss (float __F)
+{
+ return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f };
+}
+
+/* Create a vector with all four elements equal to F. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_ps (float __F)
+{
+ return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps1 (float __F)
+{
+ return _mm_set1_ps (__F);
+}
+
+/* Create a vector with element 0 as *P and the rest zero. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ss (float const *__P)
+{
+ return _mm_set_ss (*__P);
+}
+
+/* Create a vector with all four elements equal to *P. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load1_ps (float const *__P)
+{
+ return _mm_set1_ps (*__P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps1 (float const *__P)
+{
+ return _mm_load1_ps (__P);
+}
+
+/* Load four SPFP values from P. The address must be 16-byte aligned. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_ps (float const *__P)
+{
+ return (__m128) *(__v4sf *)__P;
+}
+
+/* Load four SPFP values from P. The address need not be 16-byte aligned. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_ps (float const *__P)
+{
+ return (__m128) __builtin_ia32_loadups (__P);
+}
+
+/* Load four SPFP values in reverse order. The address must be aligned. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadr_ps (float const *__P)
+{
+ __v4sf __tmp = *(__v4sf *)__P;
+ return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3));
+}
+
+/* Create the vector [Z Y X W]. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
+{
+ return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
+}
+
+/* Create the vector [W X Y Z]. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_setr_ps (float __Z, float __Y, float __X, float __W)
+{
+ return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W };
+}
+
+/* Stores the lower SPFP value. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ss (float *__P, __m128 __A)
+{
+ *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
+}
+
+extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_f32 (__m128 __A)
+{
+ return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
+}
+
+/* Store four SPFP values. The address must be 16-byte aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps (float *__P, __m128 __A)
+{
+ *(__v4sf *)__P = (__v4sf)__A;
+}
+
+/* Store four SPFP values. The address need not be 16-byte aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_ps (float *__P, __m128 __A)
+{
+ __builtin_ia32_storeups (__P, (__v4sf)__A);
+}
+
+/* Store the lower SPFP value across four words. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store1_ps (float *__P, __m128 __A)
+{
+ __v4sf __va = (__v4sf)__A;
+ __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0));
+ _mm_storeu_ps (__P, __tmp);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_ps1 (float *__P, __m128 __A)
+{
+ _mm_store1_ps (__P, __A);
+}
+
+/* Store four SPFP values in reverse order. The address must be aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storer_ps (float *__P, __m128 __A)
+{
+ __v4sf __va = (__v4sf)__A;
+ __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3));
+ _mm_store_ps (__P, __tmp);
+}
+
+/* Sets the low SPFP value of A from the low value of B. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_ss (__m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B);
+}
+
+/* Extracts one of the four words of A. The selector N must be immediate. */
+#ifdef __OPTIMIZE__
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_extract_pi16 (__m64 const __A, int const __N)
+{
+ return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pextrw (__m64 const __A, int const __N)
+{
+ return _mm_extract_pi16 (__A, __N);
+}
+#else
+#define _mm_extract_pi16(A, N) \
+ ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N)))
+
+#define _m_pextrw(A, N) _mm_extract_pi16(A, N)
+#endif
+
+/* Inserts word D into one of four words of A. The selector N must be
+ immediate. */
+#ifdef __OPTIMIZE__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
+{
+ return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pinsrw (__m64 const __A, int const __D, int const __N)
+{
+ return _mm_insert_pi16 (__A, __D, __N);
+}
+#else
+#define _mm_insert_pi16(A, D, N) \
+ ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A), \
+ (int)(D), (int)(N)))
+
+#define _m_pinsrw(A, D, N) _mm_insert_pi16(A, D, N)
+#endif
+
+/* Compute the element-wise maximum of signed 16-bit values. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pi16 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxsw (__m64 __A, __m64 __B)
+{
+ return _mm_max_pi16 (__A, __B);
+}
+
+/* Compute the element-wise maximum of unsigned 8-bit values. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_max_pu8 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmaxub (__m64 __A, __m64 __B)
+{
+ return _mm_max_pu8 (__A, __B);
+}
+
+/* Compute the element-wise minimum of signed 16-bit values. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pi16 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminsw (__m64 __A, __m64 __B)
+{
+ return _mm_min_pi16 (__A, __B);
+}
+
+/* Compute the element-wise minimum of unsigned 8-bit values. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_min_pu8 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pminub (__m64 __A, __m64 __B)
+{
+ return _mm_min_pu8 (__A, __B);
+}
+
+/* Create an 8-bit mask of the signs of 8-bit values. */
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_movemask_pi8 (__m64 __A)
+{
+ return __builtin_ia32_pmovmskb ((__v8qi)__A);
+}
+
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmovmskb (__m64 __A)
+{
+ return _mm_movemask_pi8 (__A);
+}
+
+/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values
+ in B and produce the high 16 bits of the 32-bit results. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pmulhuw (__m64 __A, __m64 __B)
+{
+ return _mm_mulhi_pu16 (__A, __B);
+}
+
+/* Return a combination of the four 16-bit values in A. The selector
+ must be an immediate. */
+#ifdef __OPTIMIZE__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi16 (__m64 __A, int const __N)
+{
+ return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pshufw (__m64 __A, int const __N)
+{
+ return _mm_shuffle_pi16 (__A, __N);
+}
+#else
+#define _mm_shuffle_pi16(A, N) \
+ ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
+
+#define _m_pshufw(A, N) _mm_shuffle_pi16 (A, N)
+#endif
+
+/* Conditionally store byte elements of A into P. The high bit of each
+ byte in the selector N determines whether the corresponding byte from
+ A is stored. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
+{
+ __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_maskmovq (__m64 __A, __m64 __N, char *__P)
+{
+ _mm_maskmove_si64 (__A, __N, __P);
+}
+
+/* Compute the rounded averages of the unsigned 8-bit values in A and B. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu8 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgb (__m64 __A, __m64 __B)
+{
+ return _mm_avg_pu8 (__A, __B);
+}
+
+/* Compute the rounded averages of the unsigned 16-bit values in A and B. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_avg_pu16 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_pavgw (__m64 __A, __m64 __B)
+{
+ return _mm_avg_pu16 (__A, __B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 8-bit
+ values in A and B. Return the value in the lower 16-bit word; the
+ upper words are cleared. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sad_pu8 (__m64 __A, __m64 __B)
+{
+ return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B);
+}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_psadbw (__m64 __A, __m64 __B)
+{
+ return _mm_sad_pu8 (__A, __B);
+}
+
+/* Loads one cache line from address P to a location "closer" to the
+ processor. The selector I specifies the type of prefetch operation. */
+#ifdef __OPTIMIZE__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_prefetch (const void *__P, enum _mm_hint __I)
+{
+ __builtin_prefetch (__P, 0, __I);
+}
+#else
+#define _mm_prefetch(P, I) \
+ __builtin_prefetch ((P), 0, (I))
+#endif
+
+/* Stores the data in A to the address P without polluting the caches. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_pi (__m64 *__P, __m64 __A)
+{
+ __builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A);
+}
+
+/* Likewise. The address must be 16-byte aligned. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_stream_ps (float *__P, __m128 __A)
+{
+ __builtin_ia32_movntps (__P, (__v4sf)__A);
+}
+
+/* Guarantees that every preceding store is globally visible before
+ any subsequent store. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_sfence (void)
+{
+ __builtin_ia32_sfence ();
+}
+
+/* The execution of the next instruction is delayed by an implementation
+ specific amount of time. The instruction does not modify the
+ architectural state. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_pause (void)
+{
+ __asm__ __volatile__ ("rep; nop" : : );
+}
+
+/* Transpose the 4x4 matrix composed of row[0-3]. */
+#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
+do { \
+ __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \
+ __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1); \
+ __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3); \
+ __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1); \
+ __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3); \
+ (row0) = __builtin_ia32_movlhps (__t0, __t1); \
+ (row1) = __builtin_ia32_movhlps (__t1, __t0); \
+ (row2) = __builtin_ia32_movlhps (__t2, __t3); \
+ (row3) = __builtin_ia32_movhlps (__t3, __t2); \
+} while (0)
+
+/* For backward source compatibility. */
+#ifdef __SSE2__
+# include <emmintrin.h>
+#endif
+
+#endif /* __SSE__ */
+#endif /* _XMMINTRIN_H_INCLUDED */