diff options
Diffstat (limited to 'gcc-4.9/gcc/config/i386/i386.md')
-rw-r--r-- | gcc-4.9/gcc/config/i386/i386.md | 18044 |
1 files changed, 18044 insertions, 0 deletions
diff --git a/gcc-4.9/gcc/config/i386/i386.md b/gcc-4.9/gcc/config/i386/i386.md new file mode 100644 index 000000000..4a8b46388 --- /dev/null +++ b/gcc-4.9/gcc/config/i386/i386.md @@ -0,0 +1,18044 @@ +;; GCC machine description for IA-32 and x86-64. +;; Copyright (C) 1988-2014 Free Software Foundation, Inc. +;; Mostly by William Schelter. +;; x86_64 support added by Jan Hubicka +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. */ +;; +;; The original PO technology requires these to be ordered by speed, +;; so that assigner will pick the fastest. +;; +;; See file "rtl.def" for documentation on define_insn, match_*, et. al. +;; +;; The special asm out single letter directives following a '%' are: +;; L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. +;; C -- print opcode suffix for set/cmov insn. +;; c -- like C, but print reversed condition +;; F,f -- likewise, but for floating-point. +;; O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", +;; otherwise nothing +;; R -- print the prefix for register names. +;; z -- print the opcode suffix for the size of the current operand. +;; Z -- likewise, with special suffixes for x87 instructions. +;; * -- print a star (in certain assembler syntax) +;; A -- print an absolute memory reference. +;; E -- print address with DImode register names if TARGET_64BIT. +;; w -- print the operand as if it's a "word" (HImode) even if it isn't. +;; s -- print a shift double count, followed by the assemblers argument +;; delimiter. +;; b -- print the QImode name of the register for the indicated operand. +;; %b0 would print %al if operands[0] is reg 0. +;; w -- likewise, print the HImode name of the register. +;; k -- likewise, print the SImode name of the register. +;; q -- likewise, print the DImode name of the register. +;; x -- likewise, print the V4SFmode name of the register. +;; t -- likewise, print the V8SFmode name of the register. +;; h -- print the QImode name for a "high" register, either ah, bh, ch or dh. +;; y -- print "st(0)" instead of "st" as a register. +;; d -- print duplicated register operand for AVX instruction. +;; D -- print condition for SSE cmp instruction. +;; P -- if PIC, print an @PLT suffix. +;; p -- print raw symbol name. +;; X -- don't print any sort of PIC '@' suffix for a symbol. +;; & -- print some in-use local-dynamic symbol name. +;; H -- print a memory address offset by 8; used for sse high-parts +;; K -- print HLE lock prefix +;; Y -- print condition for XOP pcom* instruction. +;; + -- print a branch hint as 'cs' or 'ds' prefix +;; ; -- print a semicolon (after prefixes due to bug in older gas). +;; ~ -- print "i" if TARGET_AVX2, "f" otherwise. +;; @ -- print a segment register of thread base pointer load +;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode + +(define_c_enum "unspec" [ + ;; Relocation specifiers + UNSPEC_GOT + UNSPEC_GOTOFF + UNSPEC_GOTPCREL + UNSPEC_GOTTPOFF + UNSPEC_TPOFF + UNSPEC_NTPOFF + UNSPEC_DTPOFF + UNSPEC_GOTNTPOFF + UNSPEC_INDNTPOFF + UNSPEC_PLTOFF + UNSPEC_MACHOPIC_OFFSET + UNSPEC_PCREL + + ;; Prologue support + UNSPEC_STACK_ALLOC + UNSPEC_SET_GOT + UNSPEC_SET_RIP + UNSPEC_SET_GOT_OFFSET + UNSPEC_MEMORY_BLOCKAGE + UNSPEC_STACK_CHECK + + ;; TLS support + UNSPEC_TP + UNSPEC_TLS_GD + UNSPEC_TLS_LD_BASE + UNSPEC_TLSDESC + UNSPEC_TLS_IE_SUN + + ;; Other random patterns + UNSPEC_SCAS + UNSPEC_FNSTSW + UNSPEC_SAHF + UNSPEC_PARITY + UNSPEC_FSTCW + UNSPEC_ADD_CARRY + UNSPEC_FLDCW + UNSPEC_REP + UNSPEC_LD_MPIC ; load_macho_picbase + UNSPEC_TRUNC_NOOP + UNSPEC_DIV_ALREADY_SPLIT + UNSPEC_MS_TO_SYSV_CALL + UNSPEC_PAUSE + UNSPEC_LEA_ADDR + UNSPEC_XBEGIN_ABORT + UNSPEC_STOS + + ;; For SSE/MMX support: + UNSPEC_FIX_NOTRUNC + UNSPEC_MASKMOV + UNSPEC_MOVMSK + UNSPEC_RCP + UNSPEC_RSQRT + UNSPEC_PSADBW + + ;; Generic math support + UNSPEC_COPYSIGN + UNSPEC_IEEE_MIN ; not commutative + UNSPEC_IEEE_MAX ; not commutative + + ;; x87 Floating point + UNSPEC_SIN + UNSPEC_COS + UNSPEC_FPATAN + UNSPEC_FYL2X + UNSPEC_FYL2XP1 + UNSPEC_FRNDINT + UNSPEC_FIST + UNSPEC_F2XM1 + UNSPEC_TAN + UNSPEC_FXAM + + ;; x87 Rounding + UNSPEC_FRNDINT_FLOOR + UNSPEC_FRNDINT_CEIL + UNSPEC_FRNDINT_TRUNC + UNSPEC_FRNDINT_MASK_PM + UNSPEC_FIST_FLOOR + UNSPEC_FIST_CEIL + + ;; x87 Double output FP + UNSPEC_SINCOS_COS + UNSPEC_SINCOS_SIN + UNSPEC_XTRACT_FRACT + UNSPEC_XTRACT_EXP + UNSPEC_FSCALE_FRACT + UNSPEC_FSCALE_EXP + UNSPEC_FPREM_F + UNSPEC_FPREM_U + UNSPEC_FPREM1_F + UNSPEC_FPREM1_U + + UNSPEC_C2_FLAG + UNSPEC_FXAM_MEM + + ;; SSP patterns + UNSPEC_SP_SET + UNSPEC_SP_TEST + UNSPEC_SP_TLS_SET + UNSPEC_SP_TLS_TEST + + ;; For ROUND support + UNSPEC_ROUND + + ;; For CRC32 support + UNSPEC_CRC32 + + ;; For BMI support + UNSPEC_BEXTR + + ;; For BMI2 support + UNSPEC_PDEP + UNSPEC_PEXT + + ;; For AVX512F support + UNSPEC_KMOV +]) + +(define_c_enum "unspecv" [ + UNSPECV_BLOCKAGE + UNSPECV_STACK_PROBE + UNSPECV_PROBE_STACK_RANGE + UNSPECV_ALIGN + UNSPECV_PROLOGUE_USE + UNSPECV_SPLIT_STACK_RETURN + UNSPECV_CLD + UNSPECV_NOPS + UNSPECV_RDTSC + UNSPECV_RDTSCP + UNSPECV_RDPMC + UNSPECV_LLWP_INTRINSIC + UNSPECV_SLWP_INTRINSIC + UNSPECV_LWPVAL_INTRINSIC + UNSPECV_LWPINS_INTRINSIC + UNSPECV_RDFSBASE + UNSPECV_RDGSBASE + UNSPECV_WRFSBASE + UNSPECV_WRGSBASE + UNSPECV_FXSAVE + UNSPECV_FXRSTOR + UNSPECV_FXSAVE64 + UNSPECV_FXRSTOR64 + UNSPECV_XSAVE + UNSPECV_XRSTOR + UNSPECV_XSAVE64 + UNSPECV_XRSTOR64 + UNSPECV_XSAVEOPT + UNSPECV_XSAVEOPT64 + + ;; For atomic compound assignments. + UNSPECV_FNSTENV + UNSPECV_FLDENV + UNSPECV_FNSTSW + UNSPECV_FNCLEX + + ;; For RDRAND support + UNSPECV_RDRAND + + ;; For RDSEED support + UNSPECV_RDSEED + + ;; For RTM support + UNSPECV_XBEGIN + UNSPECV_XEND + UNSPECV_XABORT + UNSPECV_XTEST + + UNSPECV_NLGR +]) + +;; Constants to represent rounding modes in the ROUND instruction +(define_constants + [(ROUND_FLOOR 0x1) + (ROUND_CEIL 0x2) + (ROUND_TRUNC 0x3) + (ROUND_MXCSR 0x4) + (ROUND_NO_EXC 0x8) + ]) + +;; Constants to represent AVX512F embeded rounding +(define_constants + [(ROUND_NEAREST_INT 0) + (ROUND_NEG_INF 1) + (ROUND_POS_INF 2) + (ROUND_ZERO 3) + (NO_ROUND 4) + (ROUND_SAE 8) + ]) + +;; Constants to represent pcomtrue/pcomfalse variants +(define_constants + [(PCOM_FALSE 0) + (PCOM_TRUE 1) + (COM_FALSE_S 2) + (COM_FALSE_P 3) + (COM_TRUE_S 4) + (COM_TRUE_P 5) + ]) + +;; Constants used in the XOP pperm instruction +(define_constants + [(PPERM_SRC 0x00) /* copy source */ + (PPERM_INVERT 0x20) /* invert source */ + (PPERM_REVERSE 0x40) /* bit reverse source */ + (PPERM_REV_INV 0x60) /* bit reverse & invert src */ + (PPERM_ZERO 0x80) /* all 0's */ + (PPERM_ONES 0xa0) /* all 1's */ + (PPERM_SIGN 0xc0) /* propagate sign bit */ + (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */ + (PPERM_SRC1 0x00) /* use first source byte */ + (PPERM_SRC2 0x10) /* use second source byte */ + ]) + +;; Registers by name. +(define_constants + [(AX_REG 0) + (DX_REG 1) + (CX_REG 2) + (BX_REG 3) + (SI_REG 4) + (DI_REG 5) + (BP_REG 6) + (SP_REG 7) + (ST0_REG 8) + (ST1_REG 9) + (ST2_REG 10) + (ST3_REG 11) + (ST4_REG 12) + (ST5_REG 13) + (ST6_REG 14) + (ST7_REG 15) + (FLAGS_REG 17) + (FPSR_REG 18) + (FPCR_REG 19) + (XMM0_REG 21) + (XMM1_REG 22) + (XMM2_REG 23) + (XMM3_REG 24) + (XMM4_REG 25) + (XMM5_REG 26) + (XMM6_REG 27) + (XMM7_REG 28) + (MM0_REG 29) + (MM1_REG 30) + (MM2_REG 31) + (MM3_REG 32) + (MM4_REG 33) + (MM5_REG 34) + (MM6_REG 35) + (MM7_REG 36) + (R8_REG 37) + (R9_REG 38) + (R10_REG 39) + (R11_REG 40) + (R12_REG 41) + (R13_REG 42) + (R14_REG 43) + (R15_REG 44) + (XMM8_REG 45) + (XMM9_REG 46) + (XMM10_REG 47) + (XMM11_REG 48) + (XMM12_REG 49) + (XMM13_REG 50) + (XMM14_REG 51) + (XMM15_REG 52) + (XMM16_REG 53) + (XMM17_REG 54) + (XMM18_REG 55) + (XMM19_REG 56) + (XMM20_REG 57) + (XMM21_REG 58) + (XMM22_REG 59) + (XMM23_REG 60) + (XMM24_REG 61) + (XMM25_REG 62) + (XMM26_REG 63) + (XMM27_REG 64) + (XMM28_REG 65) + (XMM29_REG 66) + (XMM30_REG 67) + (XMM31_REG 68) + (MASK0_REG 69) + (MASK1_REG 70) + (MASK2_REG 71) + (MASK3_REG 72) + (MASK4_REG 73) + (MASK5_REG 74) + (MASK6_REG 75) + (MASK7_REG 76) + ]) + +;; Insns whose names begin with "x86_" are emitted by gen_FOO calls +;; from i386.c. + +;; In C guard expressions, put expressions which may be compile-time +;; constants first. This allows for better optimization. For +;; example, write "TARGET_64BIT && reload_completed", not +;; "reload_completed && TARGET_64BIT". + + +;; Processor type. +(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem, + atom,slm,generic,amdfam10,bdver1,bdver2,bdver3,bdver4, + btver2" + (const (symbol_ref "ix86_schedule"))) + +;; A basic instruction type. Refinements due to arguments to be +;; provided in other attributes. +(define_attr "type" + "other,multi, + alu,alu1,negnot,imov,imovx,lea, + incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1, + imul,imulx,idiv,icmp,test,ibr,setcc,icmov, + push,pop,call,callv,leave, + str,bitmanip, + fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp, + fxch,fistp,fisttp,frndint, + sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1, + ssemul,sseimul,ssediv,sselog,sselog1, + sseishft,sseishft1,ssecmp,ssecomi, + ssecvt,ssecvt1,sseicvt,sseins, + sseshuf,sseshuf1,ssemuladd,sse4arg, + lwp,mskmov,msklog, + mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" + (const_string "other")) + +;; Main data type used by the insn +(define_attr "mode" + "unknown,none,QI,HI,SI,DI,TI,OI,XI,SF,DF,XF,TF,V16SF,V8SF,V4DF,V4SF, + V2DF,V2SF,V1DF,V8DF" + (const_string "unknown")) + +;; The CPU unit operations uses. +(define_attr "unit" "integer,i387,sse,mmx,unknown" + (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp, + fxch,fistp,fisttp,frndint") + (const_string "i387") + (eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1, + ssemul,sseimul,ssediv,sselog,sselog1, + sseishft,sseishft1,ssecmp,ssecomi, + ssecvt,ssecvt1,sseicvt,sseins, + sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov") + (const_string "sse") + (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") + (const_string "mmx") + (eq_attr "type" "other") + (const_string "unknown")] + (const_string "integer"))) + +;; The minimum required alignment of vector mode memory operands of the SSE +;; (non-VEX/EVEX) instruction in bits, if it is different from +;; GET_MODE_ALIGNMENT of the operand, otherwise 0. If an instruction has +;; multiple alternatives, this should be conservative maximum of those minimum +;; required alignments. +(define_attr "ssememalign" "" (const_int 0)) + +;; The (bounding maximum) length of an instruction immediate. +(define_attr "length_immediate" "" + (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave, + bitmanip,imulx,msklog,mskmov") + (const_int 0) + (eq_attr "unit" "i387,sse,mmx") + (const_int 0) + (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1, + rotate,rotatex,rotate1,imul,icmp,push,pop") + (symbol_ref "ix86_attr_length_immediate_default (insn, true)") + (eq_attr "type" "imov,test") + (symbol_ref "ix86_attr_length_immediate_default (insn, false)") + (eq_attr "type" "call") + (if_then_else (match_operand 0 "constant_call_address_operand") + (const_int 4) + (const_int 0)) + (eq_attr "type" "callv") + (if_then_else (match_operand 1 "constant_call_address_operand") + (const_int 4) + (const_int 0)) + ;; We don't know the size before shorten_branches. Expect + ;; the instruction to fit for better scheduling. + (eq_attr "type" "ibr") + (const_int 1) + ] + (symbol_ref "/* Update immediate_length and other attributes! */ + gcc_unreachable (),1"))) + +;; The (bounding maximum) length of an instruction address. +(define_attr "length_address" "" + (cond [(eq_attr "type" "str,other,multi,fxch") + (const_int 0) + (and (eq_attr "type" "call") + (match_operand 0 "constant_call_address_operand")) + (const_int 0) + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand")) + (const_int 0) + ] + (symbol_ref "ix86_attr_length_address_default (insn)"))) + +;; Set when length prefix is used. +(define_attr "prefix_data16" "" + (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1") + (const_int 0) + (eq_attr "mode" "HI") + (const_int 1) + (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF,TI")) + (const_int 1) + ] + (const_int 0))) + +;; Set when string REP prefix is used. +(define_attr "prefix_rep" "" + (cond [(eq_attr "type" "ssemuladd,sse4arg,sseiadd1,ssecvt1") + (const_int 0) + (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF")) + (const_int 1) + ] + (const_int 0))) + +;; Set when 0f opcode prefix is used. +(define_attr "prefix_0f" "" + (if_then_else + (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip,msklog,mskmov") + (eq_attr "unit" "sse,mmx")) + (const_int 1) + (const_int 0))) + +;; Set when REX opcode prefix is used. +(define_attr "prefix_rex" "" + (cond [(not (match_test "TARGET_64BIT")) + (const_int 0) + (and (eq_attr "mode" "DI") + (and (eq_attr "type" "!push,pop,call,callv,leave,ibr") + (eq_attr "unit" "!mmx"))) + (const_int 1) + (and (eq_attr "mode" "QI") + (match_test "x86_extended_QIreg_mentioned_p (insn)")) + (const_int 1) + (match_test "x86_extended_reg_mentioned_p (insn)") + (const_int 1) + (and (eq_attr "type" "imovx") + (match_operand:QI 1 "ext_QIreg_operand")) + (const_int 1) + ] + (const_int 0))) + +;; There are also additional prefixes in 3DNOW, SSSE3. +;; ssemuladd,sse4arg default to 0f24/0f25 and DREX byte, +;; sseiadd1,ssecvt1 to 0f7a with no DREX byte. +;; 3DNOW has 0f0f prefix, SSSE3 and SSE4_{1,2} 0f38/0f3a. +(define_attr "prefix_extra" "" + (cond [(eq_attr "type" "ssemuladd,sse4arg") + (const_int 2) + (eq_attr "type" "sseiadd1,ssecvt1") + (const_int 1) + ] + (const_int 0))) + +;; Prefix used: original, VEX or maybe VEX. +(define_attr "prefix" "orig,vex,maybe_vex,evex,maybe_evex" + (cond [(eq_attr "mode" "OI,V8SF,V4DF") + (const_string "vex") + (eq_attr "mode" "XI,V16SF,V8DF") + (const_string "evex") + ] + (const_string "orig"))) + +;; VEX W bit is used. +(define_attr "prefix_vex_w" "" (const_int 0)) + +;; The length of VEX prefix +;; Only instructions with 0f prefix can have 2 byte VEX prefix, +;; 0f38/0f3a prefixes can't. In i386.md 0f3[8a] is +;; still prefix_0f 1, with prefix_extra 1. +(define_attr "length_vex" "" + (if_then_else (and (eq_attr "prefix_0f" "1") + (eq_attr "prefix_extra" "0")) + (if_then_else (eq_attr "prefix_vex_w" "1") + (symbol_ref "ix86_attr_length_vex_default (insn, true, true)") + (symbol_ref "ix86_attr_length_vex_default (insn, true, false)")) + (if_then_else (eq_attr "prefix_vex_w" "1") + (symbol_ref "ix86_attr_length_vex_default (insn, false, true)") + (symbol_ref "ix86_attr_length_vex_default (insn, false, false)")))) + +;; 4-bytes evex prefix and 1 byte opcode. +(define_attr "length_evex" "" (const_int 5)) + +;; Set when modrm byte is used. +(define_attr "modrm" "" + (cond [(eq_attr "type" "str,leave") + (const_int 0) + (eq_attr "unit" "i387") + (const_int 0) + (and (eq_attr "type" "incdec") + (and (not (match_test "TARGET_64BIT")) + (ior (match_operand:SI 1 "register_operand") + (match_operand:HI 1 "register_operand")))) + (const_int 0) + (and (eq_attr "type" "push") + (not (match_operand 1 "memory_operand"))) + (const_int 0) + (and (eq_attr "type" "pop") + (not (match_operand 0 "memory_operand"))) + (const_int 0) + (and (eq_attr "type" "imov") + (and (not (eq_attr "mode" "DI")) + (ior (and (match_operand 0 "register_operand") + (match_operand 1 "immediate_operand")) + (ior (and (match_operand 0 "ax_reg_operand") + (match_operand 1 "memory_displacement_only_operand")) + (and (match_operand 0 "memory_displacement_only_operand") + (match_operand 1 "ax_reg_operand")))))) + (const_int 0) + (and (eq_attr "type" "call") + (match_operand 0 "constant_call_address_operand")) + (const_int 0) + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand")) + (const_int 0) + (and (eq_attr "type" "alu,alu1,icmp,test") + (match_operand 0 "ax_reg_operand")) + (symbol_ref "(get_attr_length_immediate (insn) <= (get_attr_mode (insn) != MODE_QI))") + ] + (const_int 1))) + +;; The (bounding maximum) length of an instruction in bytes. +;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences. +;; Later we may want to split them and compute proper length as for +;; other insns. +(define_attr "length" "" + (cond [(eq_attr "type" "other,multi,fistp,frndint") + (const_int 16) + (eq_attr "type" "fcmp") + (const_int 4) + (eq_attr "unit" "i387") + (plus (const_int 2) + (plus (attr "prefix_data16") + (attr "length_address"))) + (ior (eq_attr "prefix" "evex") + (and (ior (eq_attr "prefix" "maybe_evex") + (eq_attr "prefix" "maybe_vex")) + (match_test "TARGET_AVX512F"))) + (plus (attr "length_evex") + (plus (attr "length_immediate") + (plus (attr "modrm") + (attr "length_address")))) + (ior (eq_attr "prefix" "vex") + (and (ior (eq_attr "prefix" "maybe_vex") + (eq_attr "prefix" "maybe_evex")) + (match_test "TARGET_AVX"))) + (plus (attr "length_vex") + (plus (attr "length_immediate") + (plus (attr "modrm") + (attr "length_address"))))] + (plus (plus (attr "modrm") + (plus (attr "prefix_0f") + (plus (attr "prefix_rex") + (plus (attr "prefix_extra") + (const_int 1))))) + (plus (attr "prefix_rep") + (plus (attr "prefix_data16") + (plus (attr "length_immediate") + (attr "length_address"))))))) + +;; The `memory' attribute is `none' if no memory is referenced, `load' or +;; `store' if there is a simple memory reference therein, or `unknown' +;; if the instruction is complex. + +(define_attr "memory" "none,load,store,both,unknown" + (cond [(eq_attr "type" "other,multi,str,lwp") + (const_string "unknown") + (eq_attr "type" "lea,fcmov,fpspc") + (const_string "none") + (eq_attr "type" "fistp,leave") + (const_string "both") + (eq_attr "type" "frndint") + (const_string "load") + (eq_attr "type" "push") + (if_then_else (match_operand 1 "memory_operand") + (const_string "both") + (const_string "store")) + (eq_attr "type" "pop") + (if_then_else (match_operand 0 "memory_operand") + (const_string "both") + (const_string "load")) + (eq_attr "type" "setcc") + (if_then_else (match_operand 0 "memory_operand") + (const_string "store") + (const_string "none")) + (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp") + (if_then_else (ior (match_operand 0 "memory_operand") + (match_operand 1 "memory_operand")) + (const_string "load") + (const_string "none")) + (eq_attr "type" "ibr") + (if_then_else (match_operand 0 "memory_operand") + (const_string "load") + (const_string "none")) + (eq_attr "type" "call") + (if_then_else (match_operand 0 "constant_call_address_operand") + (const_string "none") + (const_string "load")) + (eq_attr "type" "callv") + (if_then_else (match_operand 1 "constant_call_address_operand") + (const_string "none") + (const_string "load")) + (and (eq_attr "type" "alu1,negnot,ishift1,sselog1,sseshuf1") + (match_operand 1 "memory_operand")) + (const_string "both") + (and (match_operand 0 "memory_operand") + (match_operand 1 "memory_operand")) + (const_string "both") + (match_operand 0 "memory_operand") + (const_string "store") + (match_operand 1 "memory_operand") + (const_string "load") + (and (eq_attr "type" + "!alu1,negnot,ishift1, + imov,imovx,icmp,test,bitmanip, + fmov,fcmp,fsgn, + sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt, + sselog1,sseshuf1,sseadd1,sseiadd1,sseishft1, + mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog") + (match_operand 2 "memory_operand")) + (const_string "load") + (and (eq_attr "type" "icmov,ssemuladd,sse4arg") + (match_operand 3 "memory_operand")) + (const_string "load") + ] + (const_string "none"))) + +;; Indicates if an instruction has both an immediate and a displacement. + +(define_attr "imm_disp" "false,true,unknown" + (cond [(eq_attr "type" "other,multi") + (const_string "unknown") + (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1") + (and (match_operand 0 "memory_displacement_operand") + (match_operand 1 "immediate_operand"))) + (const_string "true") + (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv") + (and (match_operand 0 "memory_displacement_operand") + (match_operand 2 "immediate_operand"))) + (const_string "true") + ] + (const_string "false"))) + +;; Indicates if an FP operation has an integer source. + +(define_attr "fp_int_src" "false,true" + (const_string "false")) + +;; Defines rounding mode of an FP operation. + +(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any" + (const_string "any")) + +;; Define attribute to classify add/sub insns that consumes carry flag (CF) +(define_attr "use_carry" "0,1" (const_string "0")) + +;; Define attribute to indicate unaligned ssemov insns +(define_attr "movu" "0,1" (const_string "0")) + +;; Used to control the "enabled" attribute on a per-instruction basis. +(define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64, + sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx, + avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f" + (const_string "base")) + +(define_attr "enabled" "" + (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT") + (eq_attr "isa" "x64_sse4") + (symbol_ref "TARGET_64BIT && TARGET_SSE4_1") + (eq_attr "isa" "x64_sse4_noavx") + (symbol_ref "TARGET_64BIT && TARGET_SSE4_1 && !TARGET_AVX") + (eq_attr "isa" "x64_avx") + (symbol_ref "TARGET_64BIT && TARGET_AVX") + (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT") + (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2") + (eq_attr "isa" "sse2_noavx") + (symbol_ref "TARGET_SSE2 && !TARGET_AVX") + (eq_attr "isa" "sse3") (symbol_ref "TARGET_SSE3") + (eq_attr "isa" "sse4") (symbol_ref "TARGET_SSE4_1") + (eq_attr "isa" "sse4_noavx") + (symbol_ref "TARGET_SSE4_1 && !TARGET_AVX") + (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX") + (eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX") + (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2") + (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2") + (eq_attr "isa" "bmi") (symbol_ref "TARGET_BMI") + (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2") + (eq_attr "isa" "fma4") (symbol_ref "TARGET_FMA4") + (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA") + (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F") + (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F") + (eq_attr "isa" "fma_avx512f") + (symbol_ref "TARGET_FMA || TARGET_AVX512F") + ] + (const_int 1))) + +;; Describe a user's asm statement. +(define_asm_attributes + [(set_attr "length" "128") + (set_attr "type" "multi")]) + +(define_code_iterator plusminus [plus minus]) + +(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus]) + +(define_code_iterator multdiv [mult div]) + +;; Base name for define_insn +(define_code_attr plusminus_insn + [(plus "add") (ss_plus "ssadd") (us_plus "usadd") + (minus "sub") (ss_minus "sssub") (us_minus "ussub")]) + +;; Base name for insn mnemonic. +(define_code_attr plusminus_mnemonic + [(plus "add") (ss_plus "adds") (us_plus "addus") + (minus "sub") (ss_minus "subs") (us_minus "subus")]) +(define_code_attr plusminus_carry_mnemonic + [(plus "adc") (minus "sbb")]) +(define_code_attr multdiv_mnemonic + [(mult "mul") (div "div")]) + +;; Mark commutative operators as such in constraints. +(define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%") + (minus "") (ss_minus "") (us_minus "")]) + +;; Mapping of max and min +(define_code_iterator maxmin [smax smin umax umin]) + +;; Mapping of signed max and min +(define_code_iterator smaxmin [smax smin]) + +;; Mapping of unsigned max and min +(define_code_iterator umaxmin [umax umin]) + +;; Base name for integer and FP insn mnemonic +(define_code_attr maxmin_int [(smax "maxs") (smin "mins") + (umax "maxu") (umin "minu")]) +(define_code_attr maxmin_float [(smax "max") (smin "min")]) + +;; Mapping of logic operators +(define_code_iterator any_logic [and ior xor]) +(define_code_iterator any_or [ior xor]) +(define_code_iterator fpint_logic [and xor]) + +;; Base name for insn mnemonic. +(define_code_attr logic [(and "and") (ior "or") (xor "xor")]) + +;; Mapping of logic-shift operators +(define_code_iterator any_lshift [ashift lshiftrt]) + +;; Mapping of shift-right operators +(define_code_iterator any_shiftrt [lshiftrt ashiftrt]) + +;; Mapping of all shift operators +(define_code_iterator any_shift [ashift lshiftrt ashiftrt]) + +;; Base name for define_insn +(define_code_attr shift_insn + [(ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr")]) + +;; Base name for insn mnemonic. +(define_code_attr shift [(ashift "sll") (lshiftrt "shr") (ashiftrt "sar")]) +(define_code_attr vshift [(ashift "sll") (lshiftrt "srl") (ashiftrt "sra")]) + +;; Mapping of rotate operators +(define_code_iterator any_rotate [rotate rotatert]) + +;; Base name for define_insn +(define_code_attr rotate_insn [(rotate "rotl") (rotatert "rotr")]) + +;; Base name for insn mnemonic. +(define_code_attr rotate [(rotate "rol") (rotatert "ror")]) + +;; Mapping of abs neg operators +(define_code_iterator absneg [abs neg]) + +;; Base name for x87 insn mnemonic. +(define_code_attr absneg_mnemonic [(abs "abs") (neg "chs")]) + +;; Used in signed and unsigned widening multiplications. +(define_code_iterator any_extend [sign_extend zero_extend]) + +;; Prefix for insn menmonic. +(define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")]) + +;; Prefix for define_insn +(define_code_attr u [(sign_extend "") (zero_extend "u")]) +(define_code_attr s [(sign_extend "s") (zero_extend "u")]) +(define_code_attr u_bool [(sign_extend "false") (zero_extend "true")]) + +;; Used in signed and unsigned truncations. +(define_code_iterator any_truncate [ss_truncate truncate us_truncate]) +;; Instruction suffix for truncations. +(define_code_attr trunsuffix [(ss_truncate "s") (truncate "") (us_truncate "us")]) + +;; Used in signed and unsigned fix. +(define_code_iterator any_fix [fix unsigned_fix]) +(define_code_attr fixsuffix [(fix "") (unsigned_fix "u")]) + +;; All integer modes. +(define_mode_iterator SWI1248x [QI HI SI DI]) + +;; All integer modes without QImode. +(define_mode_iterator SWI248x [HI SI DI]) + +;; All integer modes without QImode and HImode. +(define_mode_iterator SWI48x [SI DI]) + +;; All integer modes without SImode and DImode. +(define_mode_iterator SWI12 [QI HI]) + +;; All integer modes without DImode. +(define_mode_iterator SWI124 [QI HI SI]) + +;; All integer modes without QImode and DImode. +(define_mode_iterator SWI24 [HI SI]) + +;; Single word integer modes. +(define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")]) + +;; Single word integer modes without QImode. +(define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")]) + +;; Single word integer modes without QImode and HImode. +(define_mode_iterator SWI48 [SI (DI "TARGET_64BIT")]) + +;; All math-dependant single and double word integer modes. +(define_mode_iterator SDWIM [(QI "TARGET_QIMODE_MATH") + (HI "TARGET_HIMODE_MATH") + SI DI (TI "TARGET_64BIT")]) + +;; Math-dependant single word integer modes. +(define_mode_iterator SWIM [(QI "TARGET_QIMODE_MATH") + (HI "TARGET_HIMODE_MATH") + SI (DI "TARGET_64BIT")]) + +;; Math-dependant integer modes without DImode. +(define_mode_iterator SWIM124 [(QI "TARGET_QIMODE_MATH") + (HI "TARGET_HIMODE_MATH") + SI]) + +;; Math-dependant single word integer modes without QImode. +(define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH") + SI (DI "TARGET_64BIT")]) + +;; Double word integer modes. +(define_mode_iterator DWI [(DI "!TARGET_64BIT") + (TI "TARGET_64BIT")]) + +;; GET_MODE_SIZE for selected modes. As GET_MODE_SIZE is not +;; compile time constant, it is faster to use <MODE_SIZE> than +;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on +;; command line options just use GET_MODE_SIZE macro. +(define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8") (TI "16") + (SF "4") (DF "8") (XF "GET_MODE_SIZE (XFmode)") + (V16QI "16") (V32QI "32") (V64QI "64") + (V8HI "16") (V16HI "32") (V32HI "64") + (V4SI "16") (V8SI "32") (V16SI "64") + (V2DI "16") (V4DI "32") (V8DI "64") + (V1TI "16") (V2TI "32") (V4TI "64") + (V2DF "16") (V4DF "32") (V8DF "64") + (V4SF "16") (V8SF "32") (V16SF "64")]) + +;; Double word integer modes as mode attribute. +(define_mode_attr DWI [(QI "HI") (HI "SI") (SI "DI") (DI "TI")]) +(define_mode_attr dwi [(QI "hi") (HI "si") (SI "di") (DI "ti")]) + +;; Half mode for double word integer modes. +(define_mode_iterator DWIH [(SI "!TARGET_64BIT") + (DI "TARGET_64BIT")]) + +;; Instruction suffix for integer modes. +(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")]) + +;; Pointer size prefix for integer modes (Intel asm dialect) +(define_mode_attr iptrsize [(QI "BYTE") + (HI "WORD") + (SI "DWORD") + (DI "QWORD")]) + +;; Register class for integer modes. +(define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")]) + +;; Immediate operand constraint for integer modes. +(define_mode_attr i [(QI "n") (HI "n") (SI "e") (DI "e")]) + +;; General operand constraint for word modes. +(define_mode_attr g [(QI "qmn") (HI "rmn") (SI "rme") (DI "rme")]) + +;; Immediate operand constraint for double integer modes. +(define_mode_attr di [(SI "nF") (DI "e")]) + +;; Immediate operand constraint for shifts. +(define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")]) + +;; General operand predicate for integer modes. +(define_mode_attr general_operand + [(QI "general_operand") + (HI "general_operand") + (SI "x86_64_general_operand") + (DI "x86_64_general_operand") + (TI "x86_64_general_operand")]) + +;; General sign/zero extend operand predicate for integer modes. +(define_mode_attr general_szext_operand + [(QI "general_operand") + (HI "general_operand") + (SI "x86_64_szext_general_operand") + (DI "x86_64_szext_general_operand")]) + +;; Immediate operand predicate for integer modes. +(define_mode_attr immediate_operand + [(QI "immediate_operand") + (HI "immediate_operand") + (SI "x86_64_immediate_operand") + (DI "x86_64_immediate_operand")]) + +;; Nonmemory operand predicate for integer modes. +(define_mode_attr nonmemory_operand + [(QI "nonmemory_operand") + (HI "nonmemory_operand") + (SI "x86_64_nonmemory_operand") + (DI "x86_64_nonmemory_operand")]) + +;; Operand predicate for shifts. +(define_mode_attr shift_operand + [(QI "nonimmediate_operand") + (HI "nonimmediate_operand") + (SI "nonimmediate_operand") + (DI "shiftdi_operand") + (TI "register_operand")]) + +;; Operand predicate for shift argument. +(define_mode_attr shift_immediate_operand + [(QI "const_1_to_31_operand") + (HI "const_1_to_31_operand") + (SI "const_1_to_31_operand") + (DI "const_1_to_63_operand")]) + +;; Input operand predicate for arithmetic left shifts. +(define_mode_attr ashl_input_operand + [(QI "nonimmediate_operand") + (HI "nonimmediate_operand") + (SI "nonimmediate_operand") + (DI "ashldi_input_operand") + (TI "reg_or_pm1_operand")]) + +;; SSE and x87 SFmode and DFmode floating point modes +(define_mode_iterator MODEF [SF DF]) + +;; All x87 floating point modes +(define_mode_iterator X87MODEF [SF DF XF]) + +;; SSE instruction suffix for various modes +(define_mode_attr ssemodesuffix + [(SF "ss") (DF "sd") + (V16SF "ps") (V8DF "pd") + (V8SF "ps") (V4DF "pd") + (V4SF "ps") (V2DF "pd") + (V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q") + (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q") + (V64QI "b") (V16SI "d") (V8DI "q")]) + +;; SSE vector suffix for floating point modes +(define_mode_attr ssevecmodesuffix [(SF "ps") (DF "pd")]) + +;; SSE vector mode corresponding to a scalar mode +(define_mode_attr ssevecmode + [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")]) +(define_mode_attr ssevecmodelower + [(QI "v16qi") (HI "v8hi") (SI "v4si") (DI "v2di") (SF "v4sf") (DF "v2df")]) + +;; Instruction suffix for REX 64bit operators. +(define_mode_attr rex64suffix [(SI "") (DI "{q}")]) + +;; This mode iterator allows :P to be used for patterns that operate on +;; pointer-sized quantities. Exactly one of the two alternatives will match. +(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) + +;; This mode iterator allows :W to be used for patterns that operate on +;; word_mode sized quantities. +(define_mode_iterator W + [(SI "word_mode == SImode") (DI "word_mode == DImode")]) + +;; This mode iterator allows :PTR to be used for patterns that operate on +;; ptr_mode sized quantities. +(define_mode_iterator PTR + [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")]) + +;; Scheduling descriptions + +(include "pentium.md") +(include "ppro.md") +(include "k6.md") +(include "athlon.md") +(include "bdver1.md") +(include "bdver3.md") +(include "btver2.md") +(include "geode.md") +(include "atom.md") +(include "slm.md") +(include "core2.md") + + +;; Operand and operator predicates and constraints + +(include "predicates.md") +(include "constraints.md") + + +;; Compare and branch/compare and store instructions. + +(define_expand "cbranch<mode>4" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SDWIM 1 "nonimmediate_operand") + (match_operand:SDWIM 2 "<general_operand>"))) + (set (pc) (if_then_else + (match_operator 0 "ordered_comparison_operator" + [(reg:CC FLAGS_REG) (const_int 0)]) + (label_ref (match_operand 3)) + (pc)))] + "" +{ + if (MEM_P (operands[1]) && MEM_P (operands[2])) + operands[1] = force_reg (<MODE>mode, operands[1]); + ix86_expand_branch (GET_CODE (operands[0]), + operands[1], operands[2], operands[3]); + DONE; +}) + +(define_expand "cstore<mode>4" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SWIM 2 "nonimmediate_operand") + (match_operand:SWIM 3 "<general_operand>"))) + (set (match_operand:QI 0 "register_operand") + (match_operator 1 "ordered_comparison_operator" + [(reg:CC FLAGS_REG) (const_int 0)]))] + "" +{ + if (MEM_P (operands[2]) && MEM_P (operands[3])) + operands[2] = force_reg (<MODE>mode, operands[2]); + ix86_expand_setcc (operands[0], GET_CODE (operands[1]), + operands[2], operands[3]); + DONE; +}) + +(define_expand "cmp<mode>_1" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SWI48 0 "nonimmediate_operand") + (match_operand:SWI48 1 "<general_operand>")))]) + +(define_insn "*cmp<mode>_ccno_1" + [(set (reg FLAGS_REG) + (compare (match_operand:SWI 0 "nonimmediate_operand" "<r>,?m<r>") + (match_operand:SWI 1 "const0_operand")))] + "ix86_match_ccmode (insn, CCNOmode)" + "@ + test{<imodesuffix>}\t%0, %0 + cmp{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "<MODE>")]) + +(define_insn "*cmp<mode>_1" + [(set (reg FLAGS_REG) + (compare (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>") + (match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m")))] + "ix86_match_ccmode (insn, CCmode)" + "cmp{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "<MODE>")]) + +(define_insn "*cmp<mode>_minus_1" + [(set (reg FLAGS_REG) + (compare + (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>") + (match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" + "cmp{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "<MODE>")]) + +(define_insn "*cmpqi_ext_1" + [(set (reg FLAGS_REG) + (compare + (match_operand:QI 0 "nonimmediate_x64nomem_operand" "Q,m") + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) 0)))] + "ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %0|%0, %h1}" + [(set_attr "isa" "*,nox64") + (set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_2" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "const0_operand")))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t%h0, %h0" + [(set_attr "type" "test") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_expand "cmpqi_ext_3" + [(set (reg:CC FLAGS_REG) + (compare:CC + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "const_int_operand")))]) + +(define_insn "*cmpqi_ext_3" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "general_x64nomem_operand" "Qn,m")))] + "ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%1, %h0|%h0, %1}" + [(set_attr "isa" "*,nox64") + (set_attr "type" "icmp") + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_4" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +;; These implement float point compares. +;; %%% See if we can get away with VOIDmode operands on the actual insns, +;; which would allow mix and match FP modes on the compares. Which is what +;; the old patterns did, but with many more of them. + +(define_expand "cbranchxf4" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:XF 1 "nonmemory_operand") + (match_operand:XF 2 "nonmemory_operand"))) + (set (pc) (if_then_else + (match_operator 0 "ix86_fp_comparison_operator" + [(reg:CC FLAGS_REG) + (const_int 0)]) + (label_ref (match_operand 3)) + (pc)))] + "TARGET_80387" +{ + ix86_expand_branch (GET_CODE (operands[0]), + operands[1], operands[2], operands[3]); + DONE; +}) + +(define_expand "cstorexf4" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:XF 2 "nonmemory_operand") + (match_operand:XF 3 "nonmemory_operand"))) + (set (match_operand:QI 0 "register_operand") + (match_operator 1 "ix86_fp_comparison_operator" + [(reg:CC FLAGS_REG) + (const_int 0)]))] + "TARGET_80387" +{ + ix86_expand_setcc (operands[0], GET_CODE (operands[1]), + operands[2], operands[3]); + DONE; +}) + +(define_expand "cbranch<mode>4" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:MODEF 1 "cmp_fp_expander_operand") + (match_operand:MODEF 2 "cmp_fp_expander_operand"))) + (set (pc) (if_then_else + (match_operator 0 "ix86_fp_comparison_operator" + [(reg:CC FLAGS_REG) + (const_int 0)]) + (label_ref (match_operand 3)) + (pc)))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" +{ + ix86_expand_branch (GET_CODE (operands[0]), + operands[1], operands[2], operands[3]); + DONE; +}) + +(define_expand "cstore<mode>4" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:MODEF 2 "cmp_fp_expander_operand") + (match_operand:MODEF 3 "cmp_fp_expander_operand"))) + (set (match_operand:QI 0 "register_operand") + (match_operator 1 "ix86_fp_comparison_operator" + [(reg:CC FLAGS_REG) + (const_int 0)]))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" +{ + ix86_expand_setcc (operands[0], GET_CODE (operands[1]), + operands[2], operands[3]); + DONE; +}) + +(define_expand "cbranchcc4" + [(set (pc) (if_then_else + (match_operator 0 "comparison_operator" + [(match_operand 1 "flags_reg_operand") + (match_operand 2 "const0_operand")]) + (label_ref (match_operand 3)) + (pc)))] + "" +{ + ix86_expand_branch (GET_CODE (operands[0]), + operands[1], operands[2], operands[3]); + DONE; +}) + +(define_expand "cstorecc4" + [(set (match_operand:QI 0 "register_operand") + (match_operator 1 "comparison_operator" + [(match_operand 2 "flags_reg_operand") + (match_operand 3 "const0_operand")]))] + "" +{ + ix86_expand_setcc (operands[0], GET_CODE (operands[1]), + operands[2], operands[3]); + DONE; +}) + + +;; FP compares, step 1: +;; Set the FP condition codes. +;; +;; CCFPmode compare with exceptions +;; CCFPUmode compare with no exceptions + +;; We may not use "#" to split and emit these, since the REG_DEAD notes +;; used to manage the reg stack popping would not be preserved. + +(define_insn "*cmp<mode>_0_i387" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:X87MODEF 1 "register_operand" "f") + (match_operand:X87MODEF 2 "const0_operand"))] + UNSPEC_FNSTSW))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, false, false);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "<MODE>")]) + +(define_insn_and_split "*cmp<mode>_0_cc_i387" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:X87MODEF 1 "register_operand" "f") + (match_operand:X87MODEF 2 "const0_operand"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "<MODE>")]) + +(define_insn "*cmpxf_i387" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, false, false);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "XF")]) + +(define_insn_and_split "*cmpxf_cc_i387" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "XF")]) + +(define_insn "*cmp<mode>_i387" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm"))] + UNSPEC_FNSTSW))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, false, false);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "<MODE>")]) + +(define_insn_and_split "*cmp<mode>_cc_i387" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "<MODE>")]) + +(define_insn "*cmpu<mode>_i387" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFPU + (match_operand:X87MODEF 1 "register_operand" "f") + (match_operand:X87MODEF 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, false, true);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "<MODE>")]) + +(define_insn_and_split "*cmpu<mode>_cc_i387" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU + (match_operand:X87MODEF 1 "register_operand" "f") + (match_operand:X87MODEF 2 "register_operand" "f"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFPU (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "<MODE>")]) + +(define_insn "*cmp<X87MODEF:mode>_<SWI24:mode>_i387" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:X87MODEF 1 "register_operand" "f") + (match_operator:X87MODEF 3 "float_operator" + [(match_operand:SWI24 2 "memory_operand" "m")]))] + UNSPEC_FNSTSW))] + "TARGET_80387 + && (TARGET_USE_<SWI24:MODE>MODE_FIOP + || optimize_function_for_size_p (cfun))" + "* return output_fp_compare (insn, operands, false, false);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true") + (set_attr "mode" "<SWI24:MODE>")]) + +(define_insn_and_split "*cmp<X87MODEF:mode>_<SWI24:mode>_cc_i387" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:X87MODEF 1 "register_operand" "f") + (match_operator:X87MODEF 3 "float_operator" + [(match_operand:SWI24 2 "memory_operand" "m")]))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE + && (TARGET_USE_<SWI24:MODE>MODE_FIOP + || optimize_function_for_size_p (cfun))" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP + (match_dup 1) + (match_op_dup 3 [(match_dup 2)]))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true") + (set_attr "mode" "<SWI24:MODE>")]) + +;; FP compares, step 2 +;; Move the fpsw to ax. + +(define_insn "x86_fnstsw_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))] + "TARGET_80387" + "fnstsw\t%0" + [(set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 2")) + (set_attr "mode" "SI") + (set_attr "unit" "i387")]) + +;; FP compares, step 3 +;; Get ax into flags, general case. + +(define_insn "x86_sahf_1" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:HI 0 "register_operand" "a")] + UNSPEC_SAHF))] + "TARGET_SAHF" +{ +#ifndef HAVE_AS_IX86_SAHF + if (TARGET_64BIT) + return ASM_BYTE "0x9e"; + else +#endif + return "sahf"; +} + [(set_attr "length" "1") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "SI")]) + +;; Pentium Pro can do steps 1 through 3 in one go. +;; comi*, ucomi*, fcomi*, ficomi*, fucomi* +;; (these i387 instructions set flags directly) + +(define_mode_iterator FPCMP [CCFP CCFPU]) +(define_mode_attr unord [(CCFP "") (CCFPU "u")]) + +(define_insn "*cmpi<FPCMP:unord><MODEF:mode>_mixed" + [(set (reg:FPCMP FLAGS_REG) + (compare:FPCMP + (match_operand:MODEF 0 "register_operand" "f,x") + (match_operand:MODEF 1 "nonimmediate_operand" "f,xm")))] + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)" + "* return output_fp_compare (insn, operands, true, + <FPCMP:MODE>mode == CCFPUmode);" + [(set_attr "type" "fcmp,ssecomi") + (set_attr "prefix" "orig,maybe_vex") + (set_attr "mode" "<MODEF:MODE>") + (set (attr "prefix_rep") + (if_then_else (eq_attr "type" "ssecomi") + (const_string "0") + (const_string "*"))) + (set (attr "prefix_data16") + (cond [(eq_attr "type" "fcmp") + (const_string "*") + (eq_attr "mode" "DF") + (const_string "1") + ] + (const_string "0"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double")]) + +(define_insn "*cmpi<FPCMP:unord><MODEF:mode>_sse" + [(set (reg:FPCMP FLAGS_REG) + (compare:FPCMP + (match_operand:MODEF 0 "register_operand" "x") + (match_operand:MODEF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)" + "* return output_fp_compare (insn, operands, true, + <FPCMP:MODE>mode == CCFPUmode);" + [(set_attr "type" "ssecomi") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "<MODEF:MODE>") + (set_attr "prefix_rep" "0") + (set (attr "prefix_data16") + (if_then_else (eq_attr "mode" "DF") + (const_string "1") + (const_string "0"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double")]) + +(define_insn "*cmpi<FPCMP:unord><X87MODEF:mode>_i387" + [(set (reg:FPCMP FLAGS_REG) + (compare:FPCMP + (match_operand:X87MODEF 0 "register_operand" "f") + (match_operand:X87MODEF 1 "register_operand" "f")))] + "TARGET_80387 && TARGET_CMOVE + && !(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)" + "* return output_fp_compare (insn, operands, true, + <FPCMP:MODE>mode == CCFPUmode);" + [(set_attr "type" "fcmp") + (set_attr "mode" "<X87MODEF:MODE>") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "double")]) + +;; Push/pop instructions. + +(define_insn "*push<mode>2" + [(set (match_operand:DWI 0 "push_operand" "=<") + (match_operand:DWI 1 "general_no_elim_operand" "riF*o"))] + "" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:TI 0 "push_operand") + (match_operand:TI 1 "general_operand"))] + "TARGET_64BIT && reload_completed + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*pushdi2_rex64" + [(set (match_operand:DI 0 "push_operand" "=<,!<") + (match_operand:DI 1 "general_no_elim_operand" "re*m,n"))] + "TARGET_64BIT" + "@ + push{q}\t%1 + #" + [(set_attr "type" "push,multi") + (set_attr "mode" "DI")]) + +;; Convert impossible pushes of immediate to existing instructions. +;; First try to get scratch register and go through it. In case this +;; fails, push sign extended lower part first and then overwrite +;; upper part by 32bit move. +(define_peephole2 + [(match_scratch:DI 2 "r") + (set (match_operand:DI 0 "push_operand") + (match_operand:DI 1 "immediate_operand"))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))]) + +;; We need to define this as both peepholer and splitter for case +;; peephole2 pass is not run. +;; "&& 1" is needed to keep it from matching the previous pattern. +(define_peephole2 + [(set (match_operand:DI 0 "push_operand") + (match_operand:DI 1 "immediate_operand"))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode) && 1" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]); + + operands[1] = gen_lowpart (DImode, operands[2]); + operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (4))); +}) + +(define_split + [(set (match_operand:DI 0 "push_operand") + (match_operand:DI 1 "immediate_operand"))] + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed) + && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] +{ + split_double_mode (DImode, &operands[1], 1, &operands[2], &operands[3]); + + operands[1] = gen_lowpart (DImode, operands[2]); + operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (4))); +}) + +(define_split + [(set (match_operand:DI 0 "push_operand") + (match_operand:DI 1 "general_operand"))] + "!TARGET_64BIT && reload_completed + && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*pushsi2" + [(set (match_operand:SI 0 "push_operand" "=<") + (match_operand:SI 1 "general_no_elim_operand" "ri*m"))] + "!TARGET_64BIT" + "push{l}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +;; emit_push_insn when it calls move_by_pieces requires an insn to +;; "push a byte/word". But actually we use pushl, which has the effect +;; of rounding the amount pushed up to a word. + +;; For TARGET_64BIT we always round up to 8 bytes. +(define_insn "*push<mode>2_rex64" + [(set (match_operand:SWI124 0 "push_operand" "=X") + (match_operand:SWI124 1 "nonmemory_no_elim_operand" "r<i>"))] + "TARGET_64BIT" + "push{q}\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "DI")]) + +(define_insn "*push<mode>2" + [(set (match_operand:SWI12 0 "push_operand" "=X") + (match_operand:SWI12 1 "nonmemory_no_elim_operand" "rn"))] + "!TARGET_64BIT" + "push{l}\t%k1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +(define_insn "*push<mode>2_prologue" + [(set (match_operand:W 0 "push_operand" "=<") + (match_operand:W 1 "general_no_elim_operand" "r<i>*m")) + (clobber (mem:BLK (scratch)))] + "" + "push{<imodesuffix>}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "<MODE>")]) + +(define_insn "*pop<mode>1" + [(set (match_operand:W 0 "nonimmediate_operand" "=r*m") + (match_operand:W 1 "pop_operand" ">"))] + "" + "pop{<imodesuffix>}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "<MODE>")]) + +(define_insn "*pop<mode>1_epilogue" + [(set (match_operand:W 0 "nonimmediate_operand" "=r*m") + (match_operand:W 1 "pop_operand" ">")) + (clobber (mem:BLK (scratch)))] + "" + "pop{<imodesuffix>}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "<MODE>")]) + +(define_insn "*pushfl<mode>2" + [(set (match_operand:W 0 "push_operand" "=<") + (match_operand:W 1 "flags_reg_operand"))] + "" + "pushf{<imodesuffix>}" + [(set_attr "type" "push") + (set_attr "mode" "<MODE>")]) + +(define_insn "*popfl<mode>1" + [(set (match_operand:W 0 "flags_reg_operand") + (match_operand:W 1 "pop_operand" ">"))] + "" + "popf{<imodesuffix>}" + [(set_attr "type" "pop") + (set_attr "mode" "<MODE>")]) + + +;; Move instructions. + +(define_expand "movxi" + [(set (match_operand:XI 0 "nonimmediate_operand") + (match_operand:XI 1 "general_operand"))] + "TARGET_AVX512F" + "ix86_expand_move (XImode, operands); DONE;") + +;; Reload patterns to support multi-word load/store +;; with non-offsetable address. +(define_expand "reload_noff_store" + [(parallel [(match_operand 0 "memory_operand" "=m") + (match_operand 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "=&r")])] + "TARGET_64BIT" +{ + rtx mem = operands[0]; + rtx addr = XEXP (mem, 0); + + emit_move_insn (operands[2], addr); + mem = replace_equiv_address_nv (mem, operands[2]); + + emit_insn (gen_rtx_SET (VOIDmode, mem, operands[1])); + DONE; +}) + +(define_expand "reload_noff_load" + [(parallel [(match_operand 0 "register_operand" "=r") + (match_operand 1 "memory_operand" "m") + (match_operand:DI 2 "register_operand" "=r")])] + "TARGET_64BIT" +{ + rtx mem = operands[1]; + rtx addr = XEXP (mem, 0); + + emit_move_insn (operands[2], addr); + mem = replace_equiv_address_nv (mem, operands[2]); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], mem)); + DONE; +}) + +(define_expand "movoi" + [(set (match_operand:OI 0 "nonimmediate_operand") + (match_operand:OI 1 "general_operand"))] + "TARGET_AVX" + "ix86_expand_move (OImode, operands); DONE;") + +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand") + (match_operand:TI 1 "nonimmediate_operand"))] + "TARGET_64BIT || TARGET_SSE" +{ + if (TARGET_64BIT) + ix86_expand_move (TImode, operands); + else + ix86_expand_vector_move (TImode, operands); + DONE; +}) + +;; This expands to what emit_move_complex would generate if we didn't +;; have a movti pattern. Having this avoids problems with reload on +;; 32-bit targets when SSE is present, but doesn't seem to be harmful +;; to have around all the time. +(define_expand "movcdi" + [(set (match_operand:CDI 0 "nonimmediate_operand") + (match_operand:CDI 1 "general_operand"))] + "" +{ + if (push_operand (operands[0], CDImode)) + emit_move_complex_push (CDImode, operands[0], operands[1]); + else + emit_move_complex_parts (operands[0], operands[1]); + DONE; +}) + +(define_expand "mov<mode>" + [(set (match_operand:SWI1248x 0 "nonimmediate_operand") + (match_operand:SWI1248x 1 "general_operand"))] + "" + "ix86_expand_move (<MODE>mode, operands); DONE;") + +(define_insn "*mov<mode>_xor" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (match_operand:SWI48 1 "const0_operand")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "xor{l}\t%k0, %k0" + [(set_attr "type" "alu1") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*mov<mode>_or" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (match_operand:SWI48 1 "const_int_operand")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && operands[1] == constm1_rtx" + "or{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "<MODE>") + (set_attr "length_immediate" "1")]) + +(define_insn "*movxi_internal_avx512f" + [(set (match_operand:XI 0 "nonimmediate_operand" "=x,x ,m") + (match_operand:XI 1 "vector_move_operand" "C ,xm,x"))] + "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + return standard_sse_constant_opcode (insn, operands[1]); + case 1: + case 2: + if (misaligned_operand (operands[0], XImode) + || misaligned_operand (operands[1], XImode)) + return "vmovdqu32\t{%1, %0|%0, %1}"; + else + return "vmovdqa32\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "evex") + (set_attr "mode" "XI")]) + +(define_insn "*movoi_internal_avx" + [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x ,m") + (match_operand:OI 1 "vector_move_operand" "C ,xm,x"))] + "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSELOG1: + return standard_sse_constant_opcode (insn, operands[1]); + + case TYPE_SSEMOV: + if (misaligned_operand (operands[0], OImode) + || misaligned_operand (operands[1], OImode)) + { + if (get_attr_mode (insn) == MODE_V8SF) + return "vmovups\t{%1, %0|%0, %1}"; + else + return "vmovdqu\t{%1, %0|%0, %1}"; + } + else + { + if (get_attr_mode (insn) == MODE_V8SF) + return "vmovaps\t{%1, %0|%0, %1}"; + else + return "vmovdqa\t{%1, %0|%0, %1}"; + } + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "vex") + (set (attr "mode") + (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (const_string "V8SF") + (and (eq_attr "alternative" "2") + (match_test "TARGET_SSE_TYPELESS_STORES")) + (const_string "V8SF") + ] + (const_string "OI")))]) + +(define_insn "*movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,x,x ,m") + (match_operand:TI 1 "general_operand" "riFo,re,C,xm,x"))] + "(TARGET_64BIT || TARGET_SSE) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_MULTI: + return "#"; + + case TYPE_SSELOG1: + return standard_sse_constant_opcode (insn, operands[1]); + + case TYPE_SSEMOV: + /* TDmode values are passed as TImode on the stack. Moving them + to stack may result in unaligned memory access. */ + if (misaligned_operand (operands[0], TImode) + || misaligned_operand (operands[1], TImode)) + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovups\t{%1, %0|%0, %1}"; + else + return "%vmovdqu\t{%1, %0|%0, %1}"; + } + else + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + } + + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "x64,x64,*,*,*") + (set_attr "type" "multi,multi,sselog1,ssemov,ssemov") + (set (attr "prefix") + (if_then_else (eq_attr "type" "sselog1,ssemov") + (const_string "maybe_vex") + (const_string "orig"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1") + (const_string "DI") + (ior (not (match_test "TARGET_SSE2")) + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) + (const_string "V4SF") + (and (eq_attr "alternative" "4") + (match_test "TARGET_SSE_TYPELESS_STORES")) + (const_string "V4SF") + (match_test "TARGET_AVX") + (const_string "TI") + (match_test "optimize_function_for_size_p (cfun)") + (const_string "V4SF") + ] + (const_string "TI")))]) + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand") + (match_operand:TI 1 "general_operand"))] + "reload_completed + && !SSE_REG_P (operands[0]) && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*movdi_internal" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi") + (match_operand:DI 1 "general_operand" + "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_MULTI: + return "#"; + + case TYPE_MMX: + return "pxor\t%0, %0"; + + case TYPE_MMXMOV: + /* Handle broken assemblers that require movd instead of movq. */ + if (!HAVE_AS_IX86_INTERUNIT_MOVQ + && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))) + return "movd\t{%1, %0|%0, %1}"; + return "movq\t{%1, %0|%0, %1}"; + + case TYPE_SSELOG1: + if (GENERAL_REG_P (operands[0])) + return "%vpextrq\t{$0, %1, %0|%0, %1, 0}"; + + return standard_sse_constant_opcode (insn, operands[1]); + + case TYPE_SSEMOV: + switch (get_attr_mode (insn)) + { + case MODE_DI: + /* Handle broken assemblers that require movd instead of movq. */ + if (!HAVE_AS_IX86_INTERUNIT_MOVQ + && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))) + return "%vmovd\t{%1, %0|%0, %1}"; + return "%vmovq\t{%1, %0|%0, %1}"; + case MODE_TI: + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_XI: + return "vmovdqa64\t{%g1, %g0|%g0, %g1}"; + + case MODE_V2SF: + gcc_assert (!TARGET_AVX); + return "movlps\t{%1, %0|%0, %1}"; + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } + + case TYPE_SSECVT: + if (SSE_REG_P (operands[0])) + return "movq2dq\t{%1, %0|%0, %1}"; + else + return "movdq2q\t{%1, %0|%0, %1}"; + + case TYPE_LEA: + return "lea{q}\t{%E1, %0|%0, %E1}"; + + case TYPE_IMOV: + gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else if (which_alternative == 4) + return "movabs{q}\t{%1, %0|%0, %1}"; + else if (ix86_use_lea_for_mov (insn, operands)) + return "lea{q}\t{%E1, %0|%0, %E1}"; + else + return "mov{q}\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set (attr "isa") + (cond [(eq_attr "alternative" "0,1") + (const_string "nox64") + (eq_attr "alternative" "2,3,4,5,10,11,16,18") + (const_string "x64") + (eq_attr "alternative" "17") + (const_string "x64_sse4") + ] + (const_string "*"))) + (set (attr "type") + (cond [(eq_attr "alternative" "0,1") + (const_string "multi") + (eq_attr "alternative" "6") + (const_string "mmx") + (eq_attr "alternative" "7,8,9,10,11") + (const_string "mmxmov") + (eq_attr "alternative" "12,17") + (const_string "sselog1") + (eq_attr "alternative" "13,14,15,16,18") + (const_string "ssemov") + (eq_attr "alternative" "19,20") + (const_string "ssecvt") + (match_operand 1 "pic_32bit_operand") + (const_string "lea") + ] + (const_string "imov"))) + (set (attr "modrm") + (if_then_else + (and (eq_attr "alternative" "4") (eq_attr "type" "imov")) + (const_string "0") + (const_string "*"))) + (set (attr "length_immediate") + (cond [(and (eq_attr "alternative" "4") (eq_attr "type" "imov")) + (const_string "8") + (eq_attr "alternative" "17") + (const_string "1") + ] + (const_string "*"))) + (set (attr "prefix_rex") + (if_then_else (eq_attr "alternative" "10,11,16,17,18") + (const_string "1") + (const_string "*"))) + (set (attr "prefix_extra") + (if_then_else (eq_attr "alternative" "17") + (const_string "1") + (const_string "*"))) + (set (attr "prefix") + (if_then_else (eq_attr "type" "sselog1,ssemov") + (const_string "maybe_vex") + (const_string "orig"))) + (set (attr "prefix_data16") + (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI")) + (const_string "1") + (const_string "*"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "2") + (const_string "SI") + (eq_attr "alternative" "12,13") + (cond [(ior (match_operand 0 "ext_sse_reg_operand") + (match_operand 1 "ext_sse_reg_operand")) + (const_string "XI") + (ior (not (match_test "TARGET_SSE2")) + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) + (const_string "V4SF") + (match_test "TARGET_AVX") + (const_string "TI") + (match_test "optimize_function_for_size_p (cfun)") + (const_string "V4SF") + ] + (const_string "TI")) + + (and (eq_attr "alternative" "14,15") + (not (match_test "TARGET_SSE2"))) + (const_string "V2SF") + (eq_attr "alternative" "17") + (const_string "TI") + ] + (const_string "DI")))]) + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand") + (match_operand:DI 1 "general_operand"))] + "!TARGET_64BIT && reload_completed + && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0])) + && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*movsi_internal" + [(set (match_operand:SI 0 "nonimmediate_operand" + "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi") + (match_operand:SI 1 "general_operand" + "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSELOG1: + if (GENERAL_REG_P (operands[0])) + return "%vpextrd\t{$0, %1, %0|%0, %1, 0}"; + + return standard_sse_constant_opcode (insn, operands[1]); + + case TYPE_SSEMOV: + switch (get_attr_mode (insn)) + { + case MODE_SI: + return "%vmovd\t{%1, %0|%0, %1}"; + case MODE_TI: + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_XI: + return "vmovdqa32\t{%g1, %g0|%g0, %g1}"; + + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + + case MODE_SF: + gcc_assert (!TARGET_AVX); + return "movss\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } + + case TYPE_MMX: + return "pxor\t%0, %0"; + + case TYPE_MMXMOV: + switch (get_attr_mode (insn)) + { + case MODE_DI: + return "movq\t{%1, %0|%0, %1}"; + case MODE_SI: + return "movd\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } + + case TYPE_LEA: + return "lea{l}\t{%E1, %0|%0, %E1}"; + + case TYPE_IMOV: + gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); + if (ix86_use_lea_for_mov (insn, operands)) + return "lea{l}\t{%E1, %0|%0, %E1}"; + else + return "mov{l}\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set (attr "isa") + (if_then_else (eq_attr "alternative" "11") + (const_string "sse4") + (const_string "*"))) + (set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "mmx") + (eq_attr "alternative" "3,4,5") + (const_string "mmxmov") + (eq_attr "alternative" "6,11") + (const_string "sselog1") + (eq_attr "alternative" "7,8,9,10,12") + (const_string "ssemov") + (match_operand 1 "pic_32bit_operand") + (const_string "lea") + ] + (const_string "imov"))) + (set (attr "length_immediate") + (if_then_else (eq_attr "alternative" "11") + (const_string "1") + (const_string "*"))) + (set (attr "prefix_extra") + (if_then_else (eq_attr "alternative" "11") + (const_string "1") + (const_string "*"))) + (set (attr "prefix") + (if_then_else (eq_attr "type" "sselog1,ssemov") + (const_string "maybe_vex") + (const_string "orig"))) + (set (attr "prefix_data16") + (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI")) + (const_string "1") + (const_string "*"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (const_string "DI") + (eq_attr "alternative" "6,7") + (cond [(ior (match_operand 0 "ext_sse_reg_operand") + (match_operand 1 "ext_sse_reg_operand")) + (const_string "XI") + (ior (not (match_test "TARGET_SSE2")) + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) + (const_string "V4SF") + (match_test "TARGET_AVX") + (const_string "TI") + (match_test "optimize_function_for_size_p (cfun)") + (const_string "V4SF") + ] + (const_string "TI")) + + (and (eq_attr "alternative" "8,9") + (not (match_test "TARGET_SSE2"))) + (const_string "SF") + (eq_attr "alternative" "11") + (const_string "TI") + ] + (const_string "SI")))]) + +(define_insn "kmovw" + [(set (match_operand:HI 0 "nonimmediate_operand" "=k,k") + (unspec:HI + [(match_operand:HI 1 "nonimmediate_operand" "rm,k")] + UNSPEC_KMOV))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) && TARGET_AVX512F" + "@ + kmovw\t{%k1, %0|%0, %k1} + kmovw\t{%1, %0|%0, %1}"; + [(set_attr "mode" "HI") + (set_attr "type" "mskmov") + (set_attr "prefix" "vex")]) + + +(define_insn "*movhi_internal" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k,rm") + (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,rm,k,k"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + /* movzwl is faster than movw on p2 due to partial word stalls, + though not as fast as an aligned movl. */ + return "movz{wl|x}\t{%1, %k0|%k0, %1}"; + + case TYPE_MSKMOV: + switch (which_alternative) + { + case 4: return "kmovw\t{%k1, %0|%0, %k1}"; + case 5: return "kmovw\t{%1, %0|%0, %1}"; + case 6: return "kmovw\t{%1, %k0|%k0, %1}"; + default: gcc_unreachable (); + } + + default: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{w}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(match_test "optimize_function_for_size_p (cfun)") + (const_string "imov") + (and (eq_attr "alternative" "0") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_HIMODE_MATH")))) + (const_string "imov") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand")) + (const_string "imov") + (eq_attr "alternative" "4,5,6") + (const_string "mskmov") + (and (match_test "TARGET_MOVX") + (eq_attr "alternative" "0,2")) + (const_string "imovx") + ] + (const_string "imov"))) + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "4,5,6") + (const_string "vex") + (const_string "orig"))) + (set (attr "mode") + (cond [(eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand")) + (const_string "SI") + (and (eq_attr "alternative" "0") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_HIMODE_MATH")))) + (const_string "SI") + ] + (const_string "HI")))]) + +;; Situation is quite tricky about when to choose full sized (SImode) move +;; over QImode moves. For Q_REG -> Q_REG move we use full size only for +;; partial register dependency machines (such as AMD Athlon), where QImode +;; moves issue extra dependency and for partial register stalls machines +;; that don't use QImode patterns (and QImode move cause stall on the next +;; instruction). +;; +;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial +;; register stall machines with, where we use QImode instructions, since +;; partial register stall can be caused there. Then we use movzx. + +(define_insn "*movqi_internal" + [(set (match_operand:QI 0 "nonimmediate_operand" + "=q,q ,q ,r,r ,?r,m ,k,k,r") + (match_operand:QI 1 "general_operand" + "q ,qn,qm,q,rn,qm,qn,r ,k,k"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1])); + return "movz{bl|x}\t{%1, %k0|%k0, %1}"; + + case TYPE_MSKMOV: + switch (which_alternative) + { + case 7: return "kmovw\t{%k1, %0|%0, %k1}"; + case 8: return "kmovw\t{%1, %0|%0, %1}"; + case 9: return "kmovw\t{%1, %k0|%k0, %1}"; + default: gcc_unreachable (); + } + + default: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(and (eq_attr "alternative" "5") + (not (match_operand:QI 1 "aligned_operand"))) + (const_string "imovx") + (match_test "optimize_function_for_size_p (cfun)") + (const_string "imov") + (and (eq_attr "alternative" "3") + (ior (not (match_test "TARGET_PARTIAL_REG_STALL")) + (not (match_test "TARGET_QIMODE_MATH")))) + (const_string "imov") + (eq_attr "alternative" "3,5") + (const_string "imovx") + (eq_attr "alternative" "7,8,9") + (const_string "mskmov") + (and (match_test "TARGET_MOVX") + (eq_attr "alternative" "2")) + (const_string "imovx") + ] + (const_string "imov"))) + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "7,8,9") + (const_string "vex") + (const_string "orig"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,5") + (const_string "SI") + (eq_attr "alternative" "6") + (const_string "QI") + (eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "type" "imov") + (and (eq_attr "alternative" "0,1") + (and (match_test "TARGET_PARTIAL_REG_DEPENDENCY") + (and (not (match_test "optimize_function_for_size_p (cfun)")) + (not (match_test "TARGET_PARTIAL_REG_STALL")))))) + (const_string "SI") + ;; Avoid partial register stalls when not using QImode arithmetic + (and (eq_attr "type" "imov") + (and (eq_attr "alternative" "0,1") + (and (match_test "TARGET_PARTIAL_REG_STALL") + (not (match_test "TARGET_QIMODE_MATH"))))) + (const_string "SI") + ] + (const_string "QI")))]) + +;; Stores and loads of ax to arbitrary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabs<mode>_1" + [(set (mem:SWI1248x (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:SWI1248x 1 "nonmemory_operand" "a,r<i>"))] + "TARGET_LP64 && ix86_check_movabs (insn, 0)" + "@ + movabs{<imodesuffix>}\t{%1, %P0|[%P0], %1} + mov{<imodesuffix>}\t{%1, %a0|<iptrsize> PTR %a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") + (set_attr "memory" "store") + (set_attr "mode" "<MODE>")]) + +(define_insn "*movabs<mode>_2" + [(set (match_operand:SWI1248x 0 "register_operand" "=a,r") + (mem:SWI1248x (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_LP64 && ix86_check_movabs (insn, 1)" + "@ + movabs{<imodesuffix>}\t{%P1, %0|%0, [%P1]} + mov{<imodesuffix>}\t{%a1, %0|%0, <iptrsize> PTR %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "<MODE>")]) + +(define_insn "*swap<mode>" + [(set (match_operand:SWI48 0 "register_operand" "+r") + (match_operand:SWI48 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "" + "xchg{<imodesuffix>}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "mode" "<MODE>") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "double")]) + +(define_insn "*swap<mode>_1" + [(set (match_operand:SWI12 0 "register_operand" "+r") + (match_operand:SWI12 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "xchg{l}\t%k1, %k0" + [(set_attr "type" "imov") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "double")]) + +;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL +;; is disabled for AMDFAM10 +(define_insn "*swap<mode>_2" + [(set (match_operand:SWI12 0 "register_operand" "+<r>") + (match_operand:SWI12 1 "register_operand" "+<r>")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_PARTIAL_REG_STALL" + "xchg{<imodesuffix>}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "mode" "<MODE>") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector")]) + +(define_expand "movstrict<mode>" + [(set (strict_low_part (match_operand:SWI12 0 "nonimmediate_operand")) + (match_operand:SWI12 1 "general_operand"))] + "" +{ + if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun)) + FAIL; + if (GET_CODE (operands[0]) == SUBREG + && GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[0]))) != MODE_INT) + FAIL; + /* Don't generate memory->memory moves, go through a register */ + if (MEM_P (operands[0]) && MEM_P (operands[1])) + operands[1] = force_reg (<MODE>mode, operands[1]); +}) + +(define_insn "*movstrict<mode>_1" + [(set (strict_low_part + (match_operand:SWI12 0 "nonimmediate_operand" "+<r>m,<r>")) + (match_operand:SWI12 1 "general_operand" "<r>n,m"))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "mov{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "imov") + (set_attr "mode" "<MODE>")]) + +(define_insn "*movstrict<mode>_xor" + [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>")) + (match_operand:SWI12 1 "const0_operand")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "xor{<imodesuffix>}\t%0, %0" + [(set_attr "type" "alu1") + (set_attr "mode" "<MODE>") + (set_attr "length_immediate" "0")]) + +(define_insn "*mov<mode>_extv_1" + [(set (match_operand:SWI24 0 "register_operand" "=R") + (sign_extract:SWI24 (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "movs{bl|x}\t{%h1, %k0|%k0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movqi_extv_1" + [(set (match_operand:QI 0 "nonimmediate_x64nomem_operand" "=Q,?R,m") + (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q,Q") + (const_int 8) + (const_int 8)))] + "" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movs{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set_attr "isa" "*,*,nox64") + (set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand") + (ior (not (match_operand:QI 0 "QIreg_operand")) + (match_test "TARGET_MOVX"))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*mov<mode>_extzv_1" + [(set (match_operand:SWI48 0 "register_operand" "=R") + (zero_extract:SWI48 (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "movz{bl|x}\t{%h1, %k0|%k0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movqi_extzv_2" + [(set (match_operand:QI 0 "nonimmediate_x64nomem_operand" "=Q,?R,m") + (subreg:QI + (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q,Q") + (const_int 8) + (const_int 8)) 0))] + "" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set_attr "isa" "*,*,nox64") + (set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand") + (ior (not (match_operand:QI 0 "QIreg_operand")) + (match_test "TARGET_MOVX"))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "mov<mode>_insv_1" + [(set (zero_extract:SWI48 (match_operand 0 "ext_register_operand" "+Q,Q") + (const_int 8) + (const_int 8)) + (match_operand:SWI48 1 "general_x64nomem_operand" "Qn,m"))] + "" +{ + if (CONST_INT_P (operands[1])) + operands[1] = simplify_gen_subreg (QImode, operands[1], <MODE>mode, 0); + return "mov{b}\t{%b1, %h0|%h0, %b1}"; +} + [(set_attr "isa" "*,nox64") + (set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*movqi_insv_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q") + (const_int 8)))] + "" + "mov{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +;; Floating point push instructions. + +(define_insn "*pushtf" + [(set (match_operand:TF 0 "push_operand" "=<,<") + (match_operand:TF 1 "general_no_elim_operand" "x,*roF"))] + "TARGET_64BIT || TARGET_SSE" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "isa" "*,x64") + (set_attr "type" "multi") + (set_attr "unit" "sse,*") + (set_attr "mode" "TF,DI")]) + +;; %%% Kill this when call knows how to work this out. +(define_split + [(set (match_operand:TF 0 "push_operand") + (match_operand:TF 1 "sse_reg_operand"))] + "TARGET_SSE && reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16))) + (set (match_dup 0) (match_dup 1))] +{ + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +}) + +(define_insn "*pushxf" + [(set (match_operand:XF 0 "push_operand" "=<,<") + (match_operand:XF 1 "general_no_elim_operand" "f,Yx*roF"))] + "" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*") + (set (attr "mode") + (cond [(eq_attr "alternative" "1") + (if_then_else (match_test "TARGET_64BIT") + (const_string "DI") + (const_string "SI")) + ] + (const_string "XF")))]) + +;; %%% Kill this when call knows how to work this out. +(define_split + [(set (match_operand:XF 0 "push_operand") + (match_operand:XF 1 "fp_register_operand"))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (match_dup 0) (match_dup 1))] +{ + operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode)); + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +}) + +(define_insn "*pushdf" + [(set (match_operand:DF 0 "push_operand" "=<,<,<,<") + (match_operand:DF 1 "general_no_elim_operand" "f,Yd*roF,rmF,x"))] + "" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "isa" "*,nox64,x64,sse2") + (set_attr "type" "multi") + (set_attr "unit" "i387,*,*,sse") + (set_attr "mode" "DF,SI,DI,DF")]) + +;; %%% Kill this when call knows how to work this out. +(define_split + [(set (match_operand:DF 0 "push_operand") + (match_operand:DF 1 "any_fp_register_operand"))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) + (set (match_dup 0) (match_dup 1))] +{ + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +}) + +(define_insn "*pushsf_rex64" + [(set (match_operand:SF 0 "push_operand" "=X,X,X") + (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))] + "TARGET_64BIT" +{ + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{q}\t%q1"; +} + [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "SF,DI,SF")]) + +(define_insn "*pushsf" + [(set (match_operand:SF 0 "push_operand" "=<,<,<") + (match_operand:SF 1 "general_no_elim_operand" "f,rmF,x"))] + "!TARGET_64BIT" +{ + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{l}\t%1"; +} + [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "SF,SI,SF")]) + +;; %%% Kill this when call knows how to work this out. +(define_split + [(set (match_operand:SF 0 "push_operand") + (match_operand:SF 1 "any_fp_register_operand"))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (match_dup 0) (match_dup 1))] +{ + rtx op = XEXP (operands[0], 0); + if (GET_CODE (op) == PRE_DEC) + { + gcc_assert (!TARGET_64BIT); + op = GEN_INT (-4); + } + else + { + op = XEXP (XEXP (op, 1), 1); + gcc_assert (CONST_INT_P (op)); + } + operands[2] = op; + /* Preserve memory attributes. */ + operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); +}) + +(define_split + [(set (match_operand:SF 0 "push_operand") + (match_operand:SF 1 "memory_operand"))] + "reload_completed + && (operands[2] = find_constant_src (insn))" + [(set (match_dup 0) (match_dup 2))]) + +(define_split + [(set (match_operand 0 "push_operand") + (match_operand 1 "general_operand"))] + "reload_completed + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == DFmode) + && !ANY_FP_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +;; Floating point move instructions. + +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand") + (match_operand:TF 1 "nonimmediate_operand"))] + "TARGET_64BIT || TARGET_SSE" + "ix86_expand_move (TFmode, operands); DONE;") + +(define_expand "mov<mode>" + [(set (match_operand:X87MODEF 0 "nonimmediate_operand") + (match_operand:X87MODEF 1 "general_operand"))] + "" + "ix86_expand_move (<MODE>mode, operands); DONE;") + +(define_insn "*movtf_internal" + [(set (match_operand:TF 0 "nonimmediate_operand" "=x,x ,m,?*r ,!o") + (match_operand:TF 1 "general_operand" "C ,xm,x,*roF,*rC"))] + "(TARGET_64BIT || TARGET_SSE) + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && (!can_create_pseudo_p () + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || GET_CODE (operands[1]) != CONST_DOUBLE + || (optimize_function_for_size_p (cfun) + && standard_sse_constant_p (operands[1]) + && !memory_operand (operands[0], TFmode)) + || (!TARGET_MEMORY_MISMATCH_STALL + && memory_operand (operands[0], TFmode)))" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSELOG1: + return standard_sse_constant_opcode (insn, operands[1]); + + case TYPE_SSEMOV: + /* Handle misaligned load/store since we + don't have movmisaligntf pattern. */ + if (misaligned_operand (operands[0], TFmode) + || misaligned_operand (operands[1], TFmode)) + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovups\t{%1, %0|%0, %1}"; + else + return "%vmovdqu\t{%1, %0|%0, %1}"; + } + else + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + } + + case TYPE_MULTI: + return "#"; + + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "*,*,*,x64,x64") + (set_attr "type" "sselog1,ssemov,ssemov,multi,multi") + (set (attr "prefix") + (if_then_else (eq_attr "type" "sselog1,ssemov") + (const_string "maybe_vex") + (const_string "orig"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4") + (const_string "DI") + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") + (const_string "V4SF") + (and (eq_attr "alternative" "2") + (match_test "TARGET_SSE_TYPELESS_STORES")) + (const_string "V4SF") + (match_test "TARGET_AVX") + (const_string "TI") + (ior (not (match_test "TARGET_SSE2")) + (match_test "optimize_function_for_size_p (cfun)")) + (const_string "V4SF") + ] + (const_string "TI")))]) + +;; Possible store forwarding (partial memory) stall in alternatives 4 and 5. +(define_insn "*movxf_internal" + [(set (match_operand:XF 0 "nonimmediate_operand" + "=f,m,f,?Yx*r ,!o ,!o") + (match_operand:XF 1 "general_operand" + "fm,f,G,Yx*roF,Yx*rF,Yx*rC"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && (!can_create_pseudo_p () + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || GET_CODE (operands[1]) != CONST_DOUBLE + || (optimize_function_for_size_p (cfun) + && standard_80387_constant_p (operands[1]) > 0 + && !memory_operand (operands[0], XFmode)) + || (!TARGET_MEMORY_MISMATCH_STALL + && memory_operand (operands[0], XFmode)))" +{ + switch (get_attr_type (insn)) + { + case TYPE_FMOV: + if (which_alternative == 2) + return standard_80387_constant_opcode (operands[1]); + return output_387_reg_move (insn, operands); + + case TYPE_MULTI: + return "#"; + + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "*,*,*,*,nox64,x64") + (set_attr "type" "fmov,fmov,fmov,multi,multi,multi") + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,5") + (if_then_else (match_test "TARGET_64BIT") + (const_string "DI") + (const_string "SI")) + ] + (const_string "XF")))]) + +;; Possible store forwarding (partial memory) stall in alternative 4. +(define_insn "*movdf_internal" + [(set (match_operand:DF 0 "nonimmediate_operand" + "=Yf*f,m ,Yf*f,?Yd*r ,!o ,?r,?m,?r,?r,v,v,v,m,*x,*x,*x,m ,r ,Yi") + (match_operand:DF 1 "general_operand" + "Yf*fm,Yf*f,G ,Yd*roF,Yd*rF,rm,rC,C ,F ,C,v,m,v,C ,*x,m ,*x,Yj,r"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && (!can_create_pseudo_p () + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || GET_CODE (operands[1]) != CONST_DOUBLE + || (optimize_function_for_size_p (cfun) + && ((!(TARGET_SSE2 && TARGET_SSE_MATH) + && standard_80387_constant_p (operands[1]) > 0) + || (TARGET_SSE2 && TARGET_SSE_MATH + && standard_sse_constant_p (operands[1]))) + && !memory_operand (operands[0], DFmode)) + || ((TARGET_64BIT || !TARGET_MEMORY_MISMATCH_STALL) + && memory_operand (operands[0], DFmode)))" +{ + switch (get_attr_type (insn)) + { + case TYPE_FMOV: + if (which_alternative == 2) + return standard_80387_constant_opcode (operands[1]); + return output_387_reg_move (insn, operands); + + case TYPE_MULTI: + return "#"; + + case TYPE_IMOV: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%1, %k0|%k0, %1}"; + else if (which_alternative == 8) + return "movabs{q}\t{%1, %0|%0, %1}"; + else + return "mov{q}\t{%1, %0|%0, %1}"; + + case TYPE_SSELOG1: + return standard_sse_constant_opcode (insn, operands[1]); + + case TYPE_SSEMOV: + switch (get_attr_mode (insn)) + { + case MODE_DF: + if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) + return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; + return "%vmovsd\t{%1, %0|%0, %1}"; + + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_V8DF: + return "vmovapd\t{%g1, %g0|%g0, %g1}"; + case MODE_V2DF: + return "%vmovapd\t{%1, %0|%0, %1}"; + + case MODE_V2SF: + gcc_assert (!TARGET_AVX); + return "movlps\t{%1, %0|%0, %1}"; + case MODE_V1DF: + gcc_assert (!TARGET_AVX); + return "movlpd\t{%1, %0|%0, %1}"; + + case MODE_DI: + /* Handle broken assemblers that require movd instead of movq. */ + if (!HAVE_AS_IX86_INTERUNIT_MOVQ + && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))) + return "%vmovd\t{%1, %0|%0, %1}"; + return "%vmovq\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } + + default: + gcc_unreachable (); + } +} + [(set (attr "isa") + (cond [(eq_attr "alternative" "3,4") + (const_string "nox64") + (eq_attr "alternative" "5,6,7,8,17,18") + (const_string "x64") + (eq_attr "alternative" "9,10,11,12") + (const_string "sse2") + ] + (const_string "*"))) + (set (attr "type") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "fmov") + (eq_attr "alternative" "3,4") + (const_string "multi") + (eq_attr "alternative" "5,6,7,8") + (const_string "imov") + (eq_attr "alternative" "9,13") + (const_string "sselog1") + ] + (const_string "ssemov"))) + (set (attr "modrm") + (if_then_else (eq_attr "alternative" "8") + (const_string "0") + (const_string "*"))) + (set (attr "length_immediate") + (if_then_else (eq_attr "alternative" "8") + (const_string "8") + (const_string "*"))) + (set (attr "prefix") + (if_then_else (eq_attr "type" "sselog1,ssemov") + (const_string "maybe_vex") + (const_string "orig"))) + (set (attr "prefix_data16") + (if_then_else + (ior (and (eq_attr "type" "ssemov") (eq_attr "mode" "DI")) + (eq_attr "mode" "V1DF")) + (const_string "1") + (const_string "*"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,7") + (const_string "SI") + (eq_attr "alternative" "5,6,8,17,18") + (const_string "DI") + + /* xorps is one byte shorter for non-AVX targets. */ + (eq_attr "alternative" "9,13") + (cond [(not (match_test "TARGET_SSE2")) + (const_string "V4SF") + (match_test "TARGET_AVX512F") + (const_string "XI") + (match_test "TARGET_AVX") + (const_string "V2DF") + (match_test "optimize_function_for_size_p (cfun)") + (const_string "V4SF") + (match_test "TARGET_SSE_LOAD0_BY_PXOR") + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use movapd to break dependency + chains, otherwise use short move to avoid extra work. */ + + /* movaps is one byte shorter for non-AVX targets. */ + (eq_attr "alternative" "10,14") + (cond [(ior (match_operand 0 "ext_sse_reg_operand") + (match_operand 1 "ext_sse_reg_operand")) + (const_string "V8DF") + (ior (not (match_test "TARGET_SSE2")) + (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) + (const_string "V4SF") + (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_string "V2DF") + (match_test "TARGET_AVX") + (const_string "DF") + (match_test "optimize_function_for_size_p (cfun)") + (const_string "V4SF") + ] + (const_string "DF")) + + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "11,15") + (cond [(not (match_test "TARGET_SSE2")) + (const_string "V2SF") + (match_test "TARGET_AVX") + (const_string "DF") + (match_test "TARGET_SSE_SPLIT_REGS") + (const_string "V1DF") + ] + (const_string "DF")) + + (and (eq_attr "alternative" "12,16") + (not (match_test "TARGET_SSE2"))) + (const_string "V2SF") + ] + (const_string "DF")))]) + +(define_insn "*movsf_internal" + [(set (match_operand:SF 0 "nonimmediate_operand" + "=Yf*f,m ,Yf*f,?r ,?m,v,v,v,m,?r,?Yi,!*y,!*y,!m,!r ,!*Ym") + (match_operand:SF 1 "general_operand" + "Yf*fm,Yf*f,G ,rmF,rF,C,v,m,v,Yj,r ,*y ,m ,*y,*Yn,r"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && (!can_create_pseudo_p () + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || GET_CODE (operands[1]) != CONST_DOUBLE + || (optimize_function_for_size_p (cfun) + && ((!TARGET_SSE_MATH + && standard_80387_constant_p (operands[1]) > 0) + || (TARGET_SSE_MATH + && standard_sse_constant_p (operands[1])))) + || memory_operand (operands[0], SFmode))" +{ + switch (get_attr_type (insn)) + { + case TYPE_FMOV: + if (which_alternative == 2) + return standard_80387_constant_opcode (operands[1]); + return output_387_reg_move (insn, operands); + + case TYPE_IMOV: + return "mov{l}\t{%1, %0|%0, %1}"; + + case TYPE_SSELOG1: + return standard_sse_constant_opcode (insn, operands[1]); + + case TYPE_SSEMOV: + switch (get_attr_mode (insn)) + { + case MODE_SF: + if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) + return "vmovss\t{%1, %0, %0|%0, %0, %1}"; + return "%vmovss\t{%1, %0|%0, %1}"; + + case MODE_V16SF: + return "vmovaps\t{%g1, %g0|%g0, %g1}"; + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + + case MODE_SI: + return "%vmovd\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } + + case TYPE_MMXMOV: + switch (get_attr_mode (insn)) + { + case MODE_DI: + return "movq\t{%1, %0|%0, %1}"; + case MODE_SI: + return "movd\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } + + default: + gcc_unreachable (); + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "fmov") + (eq_attr "alternative" "3,4") + (const_string "imov") + (eq_attr "alternative" "5") + (const_string "sselog1") + (eq_attr "alternative" "11,12,13,14,15") + (const_string "mmxmov") + ] + (const_string "ssemov"))) + (set (attr "prefix") + (if_then_else (eq_attr "type" "sselog1,ssemov") + (const_string "maybe_vex") + (const_string "orig"))) + (set (attr "prefix_data16") + (if_then_else (and (eq_attr "type" "ssemov") (eq_attr "mode" "SI")) + (const_string "1") + (const_string "*"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,9,10,13,14,15") + (const_string "SI") + (eq_attr "alternative" "11") + (const_string "DI") + (eq_attr "alternative" "5") + (cond [(not (match_test "TARGET_SSE2")) + (const_string "V4SF") + (match_test "TARGET_AVX512F") + (const_string "V16SF") + (match_test "TARGET_AVX") + (const_string "V4SF") + (match_test "optimize_function_for_size_p (cfun)") + (const_string "V4SF") + (match_test "TARGET_SSE_LOAD0_BY_PXOR") + (const_string "TI") + ] + (const_string "V4SF")) + + /* For architectures resolving dependencies on + whole SSE registers use APS move to break dependency + chains, otherwise use short move to avoid extra work. + + Do the same for architectures resolving dependencies on + the parts. While in DF mode it is better to always handle + just register parts, the SF mode is different due to lack + of instructions to load just part of the register. It is + better to maintain the whole registers in single format + to avoid problems on using packed logical operations. */ + (eq_attr "alternative" "6") + (cond [(ior (match_operand 0 "ext_sse_reg_operand") + (match_operand 1 "ext_sse_reg_operand")) + (const_string "V16SF") + (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (match_test "TARGET_SSE_SPLIT_REGS")) + (const_string "V4SF") + ] + (const_string "SF")) + ] + (const_string "SF")))]) + +(define_split + [(set (match_operand 0 "any_fp_register_operand") + (match_operand 1 "memory_operand"))] + "reload_completed + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == DFmode + || GET_MODE (operands[0]) == SFmode) + && (operands[2] = find_constant_src (insn))" + [(set (match_dup 0) (match_dup 2))] +{ + rtx c = operands[2]; + int r = REGNO (operands[0]); + + if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c)) + || (STACK_REGNO_P (r) && standard_80387_constant_p (c) < 1)) + FAIL; +}) + +(define_split + [(set (match_operand 0 "any_fp_register_operand") + (float_extend (match_operand 1 "memory_operand")))] + "reload_completed + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == DFmode) + && (operands[2] = find_constant_src (insn))" + [(set (match_dup 0) (match_dup 2))] +{ + rtx c = operands[2]; + int r = REGNO (operands[0]); + + if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c)) + || (STACK_REGNO_P (r) && standard_80387_constant_p (c) < 1)) + FAIL; +}) + +;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence +(define_split + [(set (match_operand:X87MODEF 0 "fp_register_operand") + (match_operand:X87MODEF 1 "immediate_operand"))] + "reload_completed + && (standard_80387_constant_p (operands[1]) == 8 + || standard_80387_constant_p (operands[1]) == 9)" + [(set (match_dup 0)(match_dup 1)) + (set (match_dup 0) + (neg:X87MODEF (match_dup 0)))] +{ + REAL_VALUE_TYPE r; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + if (real_isnegzero (&r)) + operands[1] = CONST0_RTX (<MODE>mode); + else + operands[1] = CONST1_RTX (<MODE>mode); +}) + +(define_split + [(set (match_operand 0 "nonimmediate_operand") + (match_operand 1 "general_operand"))] + "reload_completed + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == DFmode) + && !(ANY_FP_REG_P (operands[0]) || ANY_FP_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "swapxf" + [(set (match_operand:XF 0 "register_operand" "+f") + (match_operand:XF 1 "register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_80387" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "XF")]) + +(define_insn "*swap<mode>" + [(set (match_operand:MODEF 0 "fp_register_operand" "+f") + (match_operand:MODEF 1 "fp_register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_80387 || reload_completed" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "<MODE>")]) + +;; Zero extension instructions + +(define_expand "zero_extendsidi2" + [(set (match_operand:DI 0 "nonimmediate_operand") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]) + +(define_insn "*zero_extendsidi2" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r,?r,?o,r ,o,?*Ym,?!*y,?r ,?r,?*Yi,?*x") + (zero_extend:DI + (match_operand:SI 1 "x86_64_zext_operand" + "0 ,rm,r ,rmWz,0,r ,m ,*Yj,*x,r ,m")))] + "" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + if (ix86_use_lea_for_mov (insn, operands)) + return "lea{l}\t{%E1, %k0|%k0, %E1}"; + else + return "mov{l}\t{%1, %k0|%k0, %1}"; + + case TYPE_MULTI: + return "#"; + + case TYPE_MMXMOV: + return "movd\t{%1, %0|%0, %1}"; + + case TYPE_SSELOG1: + return "%vpextrd\t{$0, %1, %k0|%k0, %1, 0}"; + + case TYPE_SSEMOV: + if (GENERAL_REG_P (operands[0])) + return "%vmovd\t{%1, %k0|%k0, %1}"; + + return "%vmovd\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set (attr "isa") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "nox64") + (eq_attr "alternative" "3,7") + (const_string "x64") + (eq_attr "alternative" "8") + (const_string "x64_sse4") + (eq_attr "alternative" "10") + (const_string "sse2") + ] + (const_string "*"))) + (set (attr "type") + (cond [(eq_attr "alternative" "0,1,2,4") + (const_string "multi") + (eq_attr "alternative" "5,6") + (const_string "mmxmov") + (eq_attr "alternative" "7,9,10") + (const_string "ssemov") + (eq_attr "alternative" "8") + (const_string "sselog1") + ] + (const_string "imovx"))) + (set (attr "prefix_extra") + (if_then_else (eq_attr "alternative" "8") + (const_string "1") + (const_string "*"))) + (set (attr "length_immediate") + (if_then_else (eq_attr "alternative" "8") + (const_string "1") + (const_string "*"))) + (set (attr "prefix") + (if_then_else (eq_attr "type" "ssemov,sselog1") + (const_string "maybe_vex") + (const_string "orig"))) + (set (attr "prefix_0f") + (if_then_else (eq_attr "type" "imovx") + (const_string "0") + (const_string "*"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "5,6") + (const_string "DI") + (eq_attr "alternative" "7,8,9") + (const_string "TI") + ] + (const_string "SI")))]) + +(define_split + [(set (match_operand:DI 0 "memory_operand") + (zero_extend:DI (match_operand:SI 1 "memory_operand")))] + "reload_completed" + [(set (match_dup 4) (const_int 0))] + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI (match_operand:SI 1 "register_operand")))] + "!TARGET_64BIT && reload_completed + && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0])) + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(set (match_dup 4) (const_int 0))] + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))] + "!TARGET_64BIT && reload_completed + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 4) (const_int 0))] + "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);") + +(define_insn "zero_extend<mode>di2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))] + "TARGET_64BIT" + "movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_expand "zero_extend<mode>si2" + [(set (match_operand:SI 0 "register_operand") + (zero_extend:SI (match_operand:SWI12 1 "nonimmediate_operand")))] + "" +{ + if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) + { + operands[1] = force_reg (<MODE>mode, operands[1]); + emit_insn (gen_zero_extend<mode>si2_and (operands[0], operands[1])); + DONE; + } +}) + +(define_insn_and_split "zero_extend<mode>si2_and" + [(set (match_operand:SI 0 "register_operand" "=r,?&<r>") + (zero_extend:SI + (match_operand:SWI12 1 "nonimmediate_operand" "0,<r>m"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (true_regnum (operands[0]) != true_regnum (operands[1])) + { + ix86_expand_clear (operands[0]); + + gcc_assert (!TARGET_PARTIAL_REG_STALL); + emit_insn (gen_movstrict<mode> + (gen_lowpart (<MODE>mode, operands[0]), operands[1])); + DONE; + } + + operands[2] = GEN_INT (GET_MODE_MASK (<MODE>mode)); +} + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +(define_insn "*zero_extend<mode>si2" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI + (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))] + "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))" + "movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_expand "zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand")))] + "" +{ + if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) + { + operands[1] = force_reg (QImode, operands[1]); + emit_insn (gen_zero_extendqihi2_and (operands[0], operands[1])); + DONE; + } +}) + +(define_insn_and_split "zero_extendqihi2_and" + [(set (match_operand:HI 0 "register_operand" "=r,?&q") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (true_regnum (operands[0]) != true_regnum (operands[1])) + { + ix86_expand_clear (operands[0]); + + gcc_assert (!TARGET_PARTIAL_REG_STALL); + emit_insn (gen_movstrictqi + (gen_lowpart (QImode, operands[0]), operands[1])); + DONE; + } + + operands[0] = gen_lowpart (SImode, operands[0]); +} + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +; zero extend to SImode to avoid partial register stalls +(define_insn "*zero_extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "!(TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))" + "movz{bl|x}\t{%1, %k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +;; Sign extension instructions + +(define_expand "extendsidi2" + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI (match_operand:SI 1 "register_operand")))] + "" +{ + if (!TARGET_64BIT) + { + emit_insn (gen_extendsidi2_1 (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "*extendsidi2_rex64" + [(set (match_operand:DI 0 "register_operand" "=*a,r") + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))] + "TARGET_64BIT" + "@ + {cltq|cdqe} + movs{lq|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI") + (set_attr "prefix_0f" "0") + (set_attr "modrm" "0,1")]) + +(define_insn "extendsidi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o") + (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 2 "=X,X,X,&r"))] + "!TARGET_64BIT" + "#") + +;; Split the memory case. If the source register doesn't die, it will stay +;; this way, if it does die, following peephole2s take care of it. +(define_split + [(set (match_operand:DI 0 "memory_operand") + (sign_extend:DI (match_operand:SI 1 "register_operand"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_operand:SI 2 "register_operand"))] + "reload_completed" + [(const_int 0)] +{ + split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); + + emit_move_insn (operands[3], operands[1]); + + /* Generate a cltd if possible and doing so it profitable. */ + if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + && true_regnum (operands[1]) == AX_REG + && true_regnum (operands[2]) == DX_REG) + { + emit_insn (gen_ashrsi3_cvt (operands[2], operands[1], GEN_INT (31))); + } + else + { + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_ashrsi3_cvt (operands[2], operands[2], GEN_INT (31))); + } + emit_move_insn (operands[4], operands[2]); + DONE; +}) + +;; Peepholes for the case where the source register does die, after +;; being split with the above splitter. +(define_peephole2 + [(set (match_operand:SI 0 "memory_operand") + (match_operand:SI 1 "register_operand")) + (set (match_operand:SI 2 "register_operand") (match_dup 1)) + (parallel [(set (match_dup 2) + (ashiftrt:SI (match_dup 2) (const_int 31))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_operand:SI 3 "memory_operand") (match_dup 2))] + "REGNO (operands[1]) != REGNO (operands[2]) + && peep2_reg_dead_p (2, operands[1]) + && peep2_reg_dead_p (4, operands[2]) + && !reg_mentioned_p (operands[2], operands[3])" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 3) (match_dup 1))]) + +(define_peephole2 + [(set (match_operand:SI 0 "memory_operand") + (match_operand:SI 1 "register_operand")) + (parallel [(set (match_operand:SI 2 "register_operand") + (ashiftrt:SI (match_dup 1) (const_int 31))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_operand:SI 3 "memory_operand") (match_dup 2))] + "/* cltd is shorter than sarl $31, %eax */ + !optimize_function_for_size_p (cfun) + && true_regnum (operands[1]) == AX_REG + && true_regnum (operands[2]) == DX_REG + && peep2_reg_dead_p (2, operands[1]) + && peep2_reg_dead_p (3, operands[2]) + && !reg_mentioned_p (operands[2], operands[3])" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 3) (match_dup 1))]) + +;; Extend to register case. Optimize case where source and destination +;; registers match and cases where we can use cltd. +(define_split + [(set (match_operand:DI 0 "register_operand") + (sign_extend:DI (match_operand:SI 1 "register_operand"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 2))] + "reload_completed" + [(const_int 0)] +{ + split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]); + + if (true_regnum (operands[3]) != true_regnum (operands[1])) + emit_move_insn (operands[3], operands[1]); + + /* Generate a cltd if possible and doing so it profitable. */ + if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + && true_regnum (operands[3]) == AX_REG + && true_regnum (operands[4]) == DX_REG) + { + emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31))); + DONE; + } + + if (true_regnum (operands[4]) != true_regnum (operands[1])) + emit_move_insn (operands[4], operands[1]); + + emit_insn (gen_ashrsi3_cvt (operands[4], operands[4], GEN_INT (31))); + DONE; +}) + +(define_insn "extend<mode>di2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (match_operand:SWI12 1 "nonimmediate_operand" "<r>m")))] + "TARGET_64BIT" + "movs{<imodesuffix>q|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=*a,r") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))] + "" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cwtl|cwde}"; + default: + return "movs{wl|x}\t{%1, %0|%0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "SI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) + +(define_insn "*extendhisi2_zext" + [(set (match_operand:DI 0 "register_operand" "=*a,r") + (zero_extend:DI + (sign_extend:SI + (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))] + "TARGET_64BIT" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cwtl|cwde}"; + default: + return "movs{wl|x}\t{%1, %k0|%k0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "SI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "" + "movs{bl|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*extendqisi2_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))] + "TARGET_64BIT" + "movs{bl|x}\t{%1, %k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=*a,r") + (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))] + "" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cbtw|cbw}"; + default: + return "movs{bw|x}\t{%1, %0|%0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "HI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) + +;; Conversions between float and double. + +;; These are all no-ops in the model used for the 80387. +;; So just emit moves. + +;; %%% Kill these when call knows how to work out a DFmode push earlier. +(define_split + [(set (match_operand:DF 0 "push_operand") + (float_extend:DF (match_operand:SF 1 "fp_register_operand")))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) + (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))]) + +(define_split + [(set (match_operand:XF 0 "push_operand") + (float_extend:XF (match_operand:MODEF 1 "fp_register_operand")))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));") + +(define_expand "extendsfdf2" + [(set (match_operand:DF 0 "nonimmediate_operand") + (float_extend:DF (match_operand:SF 1 "general_operand")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + /* ??? Needed for compress_float_constant since all fp constants + are TARGET_LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387) + && standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, DFmode, operands[1], SFmode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); + } +}) + +/* For converting SF(xmm2) to DF(xmm1), use the following code instead of + cvtss2sd: + unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs + cvtps2pd xmm2,xmm1 + We do the conversion post reload to avoid producing of 128bit spills + that might lead to ICE on 32bit target. The sequence unlikely combine + anyway. */ +(define_split + [(set (match_operand:DF 0 "register_operand") + (float_extend:DF + (match_operand:SF 1 "nonimmediate_operand")))] + "TARGET_USE_VECTOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 2) + (float_extend:V2DF + (vec_select:V2SF + (match_dup 3) + (parallel [(const_int 0) (const_int 1)]))))] +{ + operands[2] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + operands[3] = simplify_gen_subreg (V4SFmode, operands[0], DFmode, 0); + /* Use movss for loading from memory, unpcklps reg, reg for registers. + Try to avoid move when unpacking can be done in source. */ + if (REG_P (operands[1])) + { + /* If it is unsafe to overwrite upper half of source, we need + to move to destination and unpack there. */ + if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER + || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4) + && true_regnum (operands[0]) != true_regnum (operands[1])) + { + rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0])); + emit_move_insn (tmp, operands[1]); + } + else + operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); + emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3], + operands[3])); + } + else + emit_insn (gen_vec_setv4sf_0 (operands[3], + CONST0_RTX (V4SFmode), operands[1])); +}) + +;; It's more profitable to split and then extend in the same register. +(define_peephole2 + [(set (match_operand:DF 0 "register_operand") + (float_extend:DF + (match_operand:SF 1 "memory_operand")))] + "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && SSE_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float_extend:DF (match_dup 2)))] + "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));") + +(define_insn "*extendsfdf2_mixed" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x") + (float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "fm,f,xm")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return "%vcvtss2sd\t{%1, %d0|%d0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,ssecvt") + (set_attr "prefix" "orig,orig,maybe_vex") + (set_attr "mode" "SF,XF,DF")]) + +(define_insn "*extendsfdf2_sse" + [(set (match_operand:DF 0 "nonimmediate_operand" "=x") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "%vcvtss2sd\t{%1, %d0|%d0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "DF")]) + +(define_insn "*extendsfdf2_i387" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] + "TARGET_80387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "SF,XF")]) + +(define_expand "extend<mode>xf2" + [(set (match_operand:XF 0 "nonimmediate_operand") + (float_extend:XF (match_operand:MODEF 1 "general_operand")))] + "TARGET_80387" +{ + /* ??? Needed for compress_float_constant since all fp constants + are TARGET_LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + if (standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, XFmode, operands[1], <MODE>mode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (<MODE>mode, operands[1])); + } +}) + +(define_insn "*extend<mode>xf2_i387" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") + (float_extend:XF + (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))] + "TARGET_80387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "<MODE>,XF")]) + +;; %%% This seems bad bad news. +;; This cannot output into an f-reg because there is no way to be sure +;; of truncating in that case. Otherwise this is just like a simple move +;; insn. So we pretend we can output to a reg in order to get better +;; register preferencing, but we really use a stack slot. + +;; Conversion from DFmode to SFmode. + +(define_expand "truncdfsf2" + [(set (match_operand:SF 0 "nonimmediate_operand") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) + ; + else if (flag_unsafe_math_optimizations) + ; + else + { + rtx temp = assign_386_stack_local (SFmode, SLOT_TEMP); + emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp)); + DONE; + } +}) + +/* For converting DF(xmm2) to SF(xmm1), use the following code instead of + cvtsd2ss: + unpcklpd xmm2,xmm2 ; packed conversion might crash on signaling NaNs + cvtpd2ps xmm2,xmm1 + We do the conversion post reload to avoid producing of 128bit spills + that might lead to ICE on 32bit target. The sequence unlikely combine + anyway. */ +(define_split + [(set (match_operand:SF 0 "register_operand") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand")))] + "TARGET_USE_VECTOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 2) + (vec_concat:V4SF + (float_truncate:V2SF + (match_dup 4)) + (match_dup 3)))] +{ + operands[2] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + operands[3] = CONST0_RTX (V2SFmode); + operands[4] = simplify_gen_subreg (V2DFmode, operands[0], SFmode, 0); + /* Use movsd for loading from memory, unpcklpd for registers. + Try to avoid move when unpacking can be done in source, or SSE3 + movddup is available. */ + if (REG_P (operands[1])) + { + if (!TARGET_SSE3 + && true_regnum (operands[0]) != true_regnum (operands[1]) + && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER + || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8)) + { + rtx tmp = simplify_gen_subreg (DFmode, operands[0], SFmode, 0); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } + else if (!TARGET_SSE3) + operands[4] = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + emit_insn (gen_vec_dupv2df (operands[4], operands[1])); + } + else + emit_insn (gen_sse2_loadlpd (operands[4], + CONST0_RTX (V2DFmode), operands[1])); +}) + +;; It's more profitable to split and then extend in the same register. +(define_peephole2 + [(set (match_operand:SF 0 "register_operand") + (float_truncate:SF + (match_operand:DF 1 "memory_operand")))] + "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && SSE_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float_truncate:SF (match_dup 2)))] + "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));") + +(define_expand "truncdfsf2_with_temp" + [(parallel [(set (match_operand:SF 0) + (float_truncate:SF (match_operand:DF 1))) + (clobber (match_operand:SF 2))])]) + +(define_insn "*truncdfsf_fast_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=fm,x") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,xm")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 && flag_unsafe_math_optimizations" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + case 1: + return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,ssecvt") + (set_attr "prefix" "orig,maybe_vex") + (set_attr "mode" "SF")]) + +;; Yes, this one doesn't depend on flag_unsafe_math_optimizations, +;; because nothing we do here is unsafe. +(define_insn "*truncdfsf_fast_sse" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "%vcvtsd2ss\t{%1, %d0|%d0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf_fast_i387" + [(set (match_operand:SF 0 "nonimmediate_operand" "=fm") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f")))] + "TARGET_80387 && flag_unsafe_math_optimizations" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,x ,?f,?x,?*r") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,xm,f ,f ,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,X ,m ,m ,m"))] + "TARGET_MIX_SSE_I387" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + case 1: + return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; + + default: + return "#"; + } +} + [(set_attr "isa" "*,sse2,*,*,*") + (set_attr "type" "fmov,ssecvt,multi,multi,multi") + (set_attr "unit" "*,*,i387,i387,i387") + (set_attr "prefix" "orig,maybe_vex,orig,orig,orig") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf_i387" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,f ,f ,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))] + "TARGET_80387" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + + default: + return "#"; + } +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf2_i387_1" + [(set (match_operand:SF 0 "memory_operand" "=m") + (float_truncate:SF + (match_operand:DF 1 "register_operand" "f")))] + "TARGET_80387 + && !(TARGET_SSE2 && TARGET_SSE_MATH) + && !TARGET_MIX_SSE_I387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) + +(define_split + [(set (match_operand:SF 0 "register_operand") + (float_truncate:SF + (match_operand:DF 1 "fp_register_operand"))) + (clobber (match_operand 2))] + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));") + +;; Conversion from XFmode to {SF,DF}mode + +(define_expand "truncxf<mode>2" + [(parallel [(set (match_operand:MODEF 0 "nonimmediate_operand") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand"))) + (clobber (match_dup 2))])] + "TARGET_80387" +{ + if (flag_unsafe_math_optimizations) + { + rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (<MODE>mode); + emit_insn (gen_truncxf<mode>2_i387_noop (reg, operands[1])); + if (reg != operands[0]) + emit_move_insn (operands[0], reg); + DONE; + } + else + operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); +}) + +(define_insn "*truncxfsf2_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r") + (float_truncate:SF + (match_operand:XF 1 "register_operand" "f ,f ,f ,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))] + "TARGET_80387" +{ + gcc_assert (!which_alternative); + return output_387_reg_move (insn, operands); +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") + (set_attr "mode" "SF")]) + +(define_insn "*truncxfdf2_mixed" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?x,?*r") + (float_truncate:DF + (match_operand:XF 1 "register_operand" "f ,f ,f ,f"))) + (clobber (match_operand:DF 2 "memory_operand" "=X,m ,m ,m"))] + "TARGET_80387" +{ + gcc_assert (!which_alternative); + return output_387_reg_move (insn, operands); +} + [(set_attr "isa" "*,*,sse2,*") + (set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") + (set_attr "mode" "DF")]) + +(define_insn "truncxf<mode>2_i387_noop" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" "f")))] + "TARGET_80387 && flag_unsafe_math_optimizations" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "<MODE>")]) + +(define_insn "*truncxf<mode>2_i387" + [(set (match_operand:MODEF 0 "memory_operand" "=m") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" "f")))] + "TARGET_80387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand"))) + (clobber (match_operand:MODEF 2 "memory_operand"))] + "TARGET_80387 && reload_completed" + [(set (match_dup 2) (float_truncate:MODEF (match_dup 1))) + (set (match_dup 0) (match_dup 2))]) + +(define_split + [(set (match_operand:MODEF 0 "memory_operand") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand"))) + (clobber (match_operand:MODEF 2 "memory_operand"))] + "TARGET_80387" + [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))]) + +;; Signed conversion to DImode. + +(define_expand "fix_truncxfdi2" + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand") + (fix:DI (match_operand:XF 1 "register_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +(define_expand "fix_trunc<mode>di2" + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand") + (fix:DI (match_operand:MODEF 1 "register_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))" +{ + if (TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode); + emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1])); + if (out != operands[0]) + emit_move_insn (operands[0], out); + DONE; + } +}) + +;; Signed conversion to SImode. + +(define_expand "fix_truncxfsi2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand") + (fix:SI (match_operand:XF 1 "register_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +(define_expand "fix_trunc<mode>si2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand") + (fix:SI (match_operand:MODEF 1 "register_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)" +{ + if (TARGET_FISTTP + && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (SSE_FLOAT_MODE_P (<MODE>mode)) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); + emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1])); + if (out != operands[0]) + emit_move_insn (operands[0], out); + DONE; + } +}) + +;; Signed conversion to HImode. + +(define_expand "fix_trunc<mode>hi2" + [(parallel [(set (match_operand:HI 0 "nonimmediate_operand") + (fix:HI (match_operand:X87MODEF 1 "register_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 + && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +;; Unsigned conversion to SImode. + +(define_expand "fixuns_trunc<mode>si2" + [(parallel + [(set (match_operand:SI 0 "register_operand") + (unsigned_fix:SI + (match_operand:MODEF 1 "nonimmediate_operand"))) + (use (match_dup 2)) + (clobber (match_scratch:<ssevecmode> 3)) + (clobber (match_scratch:<ssevecmode> 4))])] + "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH" +{ + enum machine_mode mode = <MODE>mode; + enum machine_mode vecmode = <ssevecmode>mode; + REAL_VALUE_TYPE TWO31r; + rtx two31; + + if (optimize_insn_for_size_p ()) + FAIL; + + real_ldexp (&TWO31r, &dconst1, 31); + two31 = const_double_from_real_value (TWO31r, mode); + two31 = ix86_build_const_vector (vecmode, true, two31); + operands[2] = force_reg (vecmode, two31); +}) + +(define_insn_and_split "*fixuns_trunc<mode>_1" + [(set (match_operand:SI 0 "register_operand" "=&x,&x") + (unsigned_fix:SI + (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm"))) + (use (match_operand:<ssevecmode> 4 "nonimmediate_operand" "m,x")) + (clobber (match_scratch:<ssevecmode> 1 "=x,&x")) + (clobber (match_scratch:<ssevecmode> 2 "=x,x"))] + "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH + && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + ix86_split_convert_uns_si_sse (operands); + DONE; +}) + +;; Unsigned conversion to HImode. +;; Without these patterns, we'll try the unsigned SI conversion which +;; is complex for SSE, rather than the signed SI conversion, which isn't. + +(define_expand "fixuns_trunc<mode>hi2" + [(set (match_dup 2) + (fix:SI (match_operand:MODEF 1 "nonimmediate_operand"))) + (set (match_operand:HI 0 "nonimmediate_operand") + (subreg:HI (match_dup 2) 0))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" + "operands[2] = gen_reg_rtx (SImode);") + +;; When SSE is available, it is always faster to use it! +(define_insn "fix_trunc<MODEF:mode><SWI48:mode>_sse" + [(set (match_operand:SWI48 0 "register_operand" "=r,r") + (fix:SWI48 (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] + "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) + && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "%vcvtt<MODEF:ssemodesuffix>2si<SWI48:rex64suffix>\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "maybe_vex") + (set (attr "prefix_rex") + (if_then_else + (match_test "<SWI48:MODE>mode == DImode") + (const_string "1") + (const_string "*"))) + (set_attr "mode" "<MODEF:MODE>") + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double") + (set_attr "bdver1_decode" "double,double")]) + +;; Avoid vector decoded forms of the instruction. +(define_peephole2 + [(match_scratch:MODEF 2 "x") + (set (match_operand:SWI48 0 "register_operand") + (fix:SWI48 (match_operand:MODEF 1 "memory_operand")))] + "TARGET_AVOID_VECTOR_DECODE + && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) + && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SWI48 (match_dup 2)))]) + +(define_insn_and_split "fix_trunc<mode>_fisttp_i387_1" + [(set (match_operand:SWI248x 0 "nonimmediate_operand") + (fix:SWI248x (match_operand 1 "register_operand")))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || <MODE>mode != DImode)) + && TARGET_SSE_MATH) + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_trunc<mode>_i387_fisttp (operands[0], operands[1])); + else + { + operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); + emit_insn (gen_fix_trunc<mode>_i387_fisttp_with_temp (operands[0], + operands[1], + operands[2])); + } + DONE; +} + [(set_attr "type" "fisttp") + (set_attr "mode" "<MODE>")]) + +(define_insn "fix_trunc<mode>_i387_fisttp" + [(set (match_operand:SWI248x 0 "memory_operand" "=m") + (fix:SWI248x (match_operand 1 "register_operand" "f"))) + (clobber (match_scratch:XF 2 "=&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || <MODE>mode != DImode)) + && TARGET_SSE_MATH)" + "* return output_fix_trunc (insn, operands, true);" + [(set_attr "type" "fisttp") + (set_attr "mode" "<MODE>")]) + +(define_insn "fix_trunc<mode>_i387_fisttp_with_temp" + [(set (match_operand:SWI248x 0 "nonimmediate_operand" "=m,?r") + (fix:SWI248x (match_operand 1 "register_operand" "f,f"))) + (clobber (match_operand:SWI248x 2 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 3 "=&1f,&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || <MODE>mode != DImode)) + && TARGET_SSE_MATH)" + "#" + [(set_attr "type" "fisttp") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:SWI248x 0 "register_operand") + (fix:SWI248x (match_operand 1 "register_operand"))) + (clobber (match_operand:SWI248x 2 "memory_operand")) + (clobber (match_scratch 3))] + "reload_completed" + [(parallel [(set (match_dup 2) (fix:SWI248x (match_dup 1))) + (clobber (match_dup 3))]) + (set (match_dup 0) (match_dup 2))]) + +(define_split + [(set (match_operand:SWI248x 0 "memory_operand") + (fix:SWI248x (match_operand 1 "register_operand"))) + (clobber (match_operand:SWI248x 2 "memory_operand")) + (clobber (match_scratch 3))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:SWI248x (match_dup 1))) + (clobber (match_dup 3))])]) + +;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description +;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control +;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG +;; clobbering insns can be used. Look at emit_i387_cw_initialization () +;; function in i386.c. +(define_insn_and_split "*fix_trunc<mode>_i387_1" + [(set (match_operand:SWI248x 0 "nonimmediate_operand") + (fix:SWI248x (match_operand 1 "register_operand"))) + (clobber (reg:CC FLAGS_REG))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || <MODE>mode != DImode)) + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_TRUNC] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); + emit_insn (gen_fix_trunc<mode>_i387_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "<MODE>")]) + +(define_insn "fix_truncdi_i387" + [(set (match_operand:DI 0 "memory_operand" "=m") + (fix:DI (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" + "* return output_fix_trunc (insn, operands, false);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "DI")]) + +(define_insn "fix_truncdi_i387_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (fix:DI (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (fix:DI (match_operand 1 "register_operand"))) + (use (match_operand:HI 2 "memory_operand")) + (use (match_operand:HI 3 "memory_operand")) + (clobber (match_operand:DI 4 "memory_operand")) + (clobber (match_scratch 5))] + "reload_completed" + [(parallel [(set (match_dup 4) (fix:DI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))]) + +(define_split + [(set (match_operand:DI 0 "memory_operand") + (fix:DI (match_operand 1 "register_operand"))) + (use (match_operand:HI 2 "memory_operand")) + (use (match_operand:HI 3 "memory_operand")) + (clobber (match_operand:DI 4 "memory_operand")) + (clobber (match_scratch 5))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:DI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])]) + +(define_insn "fix_trunc<mode>_i387" + [(set (match_operand:SWI24 0 "memory_operand" "=m") + (fix:SWI24 (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "* return output_fix_trunc (insn, operands, false);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "<MODE>")]) + +(define_insn "fix_trunc<mode>_i387_with_temp" + [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m,?r") + (fix:SWI24 (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:SWI24 4 "memory_operand" "=X,m"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:SWI24 0 "register_operand") + (fix:SWI24 (match_operand 1 "register_operand"))) + (use (match_operand:HI 2 "memory_operand")) + (use (match_operand:HI 3 "memory_operand")) + (clobber (match_operand:SWI24 4 "memory_operand"))] + "reload_completed" + [(parallel [(set (match_dup 4) (fix:SWI24 (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))]) + +(define_split + [(set (match_operand:SWI24 0 "memory_operand") + (fix:SWI24 (match_operand 1 "register_operand"))) + (use (match_operand:HI 2 "memory_operand")) + (use (match_operand:HI 3 "memory_operand")) + (clobber (match_operand:SWI24 4 "memory_operand"))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:SWI24 (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3))])]) + +(define_insn "x86_fnstcw_1" + [(set (match_operand:HI 0 "memory_operand" "=m") + (unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))] + "TARGET_80387" + "fnstcw\t%0" + [(set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 2")) + (set_attr "mode" "HI") + (set_attr "unit" "i387") + (set_attr "bdver1_decode" "vector")]) + +(define_insn "x86_fldcw_1" + [(set (reg:HI FPCR_REG) + (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))] + "TARGET_80387" + "fldcw\t%0" + [(set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 2")) + (set_attr "mode" "HI") + (set_attr "unit" "i387") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) + +;; Conversion between fixed point and floating point. + +;; Even though we only accept memory inputs, the backend _really_ +;; wants to be able to do this between registers. Thankfully, LRA +;; will fix this up for us during register allocation. + +(define_insn "floathi<mode>2" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" + "fild%Z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "<MODE>") + (set_attr "fp_int_src" "true")]) + +(define_insn "float<SWI48x:mode>xf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (float:XF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))] + "TARGET_80387" + "fild%Z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "XF") + (set_attr "fp_int_src" "true")]) + +(define_expand "float<SWI48:mode><MODEF:mode>2" + [(set (match_operand:MODEF 0 "register_operand") + (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)" +{ + if (!(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH) + && !X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48:MODE>mode)) + { + rtx reg = gen_reg_rtx (XFmode); + rtx (*insn)(rtx, rtx); + + emit_insn (gen_float<SWI48:mode>xf2 (reg, operands[1])); + + if (<MODEF:MODE>mode == SFmode) + insn = gen_truncxfsf2; + else if (<MODEF:MODE>mode == DFmode) + insn = gen_truncxfdf2; + else + gcc_unreachable (); + + emit_insn (insn (operands[0], reg)); + DONE; + } +}) + +(define_insn "*float<SWI48:mode><MODEF:mode>2_sse" + [(set (match_operand:MODEF 0 "register_operand" "=f,x,x") + (float:MODEF + (match_operand:SWI48 1 "nonimmediate_operand" "m,r,m")))] + "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH" + "@ + fild%Z1\t%1 + %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1} + %vcvtsi2<MODEF:ssemodesuffix><SWI48:rex64suffix>\t{%1, %d0|%d0, %1}" + [(set_attr "type" "fmov,sseicvt,sseicvt") + (set_attr "prefix" "orig,maybe_vex,maybe_vex") + (set_attr "mode" "<MODEF:MODE>") + (set (attr "prefix_rex") + (if_then_else + (and (eq_attr "prefix" "maybe_vex") + (match_test "<SWI48:MODE>mode == DImode")) + (const_string "1") + (const_string "*"))) + (set_attr "unit" "i387,*,*") + (set_attr "athlon_decode" "*,double,direct") + (set_attr "amdfam10_decode" "*,vector,double") + (set_attr "bdver1_decode" "*,double,direct") + (set_attr "fp_int_src" "true") + (set (attr "enabled") + (cond [(eq_attr "alternative" "0") + (symbol_ref "TARGET_MIX_SSE_I387 + && X87_ENABLE_FLOAT (<MODEF:MODE>mode, + <SWI48:MODE>mode)") + (eq_attr "alternative" "1") + (symbol_ref "TARGET_INTER_UNIT_CONVERSIONS + || optimize_function_for_size_p (cfun)") + ] + (symbol_ref "true"))) + ]) + +(define_insn "*float<SWI48x:mode><MODEF:mode>2_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (float:MODEF (match_operand:SWI48x 1 "nonimmediate_operand" "m")))] + "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI48x:MODE>mode)" + "fild%Z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "<MODEF:MODE>") + (set_attr "fp_int_src" "true")]) + +;; Try TARGET_USE_VECTOR_CONVERTS, but not so hard as to require extra memory +;; slots when !TARGET_INTER_UNIT_MOVES_TO_VEC disables the general_regs +;; alternative in sse2_loadld. +(define_split + [(set (match_operand:MODEF 0 "register_operand") + (float:MODEF (match_operand:SI 1 "nonimmediate_operand")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) + && reload_completed && SSE_REG_P (operands[0]) + && (MEM_P (operands[1]) || TARGET_INTER_UNIT_MOVES_TO_VEC)" + [(const_int 0)] +{ + operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0], + <MODE>mode, 0); + operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0); + + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + + if (<ssevecmode>mode == V4SFmode) + emit_insn (gen_floatv4siv4sf2 (operands[3], operands[4])); + else + emit_insn (gen_sse2_cvtdq2pd (operands[3], operands[4])); + DONE; +}) + +;; Avoid partial SSE register dependency stalls +(define_split + [(set (match_operand:MODEF 0 "register_operand") + (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_SSE_PARTIAL_REG_DEPENDENCY + && optimize_function_for_speed_p (cfun) + && reload_completed && SSE_REG_P (operands[0])" + [(const_int 0)] +{ + const enum machine_mode vmode = <MODEF:ssevecmode>mode; + const enum machine_mode mode = <MODEF:MODE>mode; + rtx t, op0 = simplify_gen_subreg (vmode, operands[0], mode, 0); + + emit_move_insn (op0, CONST0_RTX (vmode)); + + t = gen_rtx_FLOAT (mode, operands[1]); + t = gen_rtx_VEC_DUPLICATE (vmode, t); + t = gen_rtx_VEC_MERGE (vmode, t, op0, const1_rtx); + emit_insn (gen_rtx_SET (VOIDmode, op0, t)); + DONE; +}) + +;; Break partial reg stall for cvtsd2ss. + +(define_peephole2 + [(set (match_operand:SF 0 "register_operand") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_SSE_PARTIAL_REG_DEPENDENCY + && optimize_function_for_speed_p (cfun) + && SSE_REG_P (operands[0]) + && (!SSE_REG_P (operands[1]) + || REGNO (operands[0]) != REGNO (operands[1]))" + [(set (match_dup 0) + (vec_merge:V4SF + (vec_duplicate:V4SF + (float_truncate:V2SF + (match_dup 1))) + (match_dup 0) + (const_int 1)))] +{ + operands[0] = simplify_gen_subreg (V4SFmode, operands[0], + SFmode, 0); + operands[1] = simplify_gen_subreg (V2DFmode, operands[1], + DFmode, 0); + emit_move_insn (operands[0], CONST0_RTX (V4SFmode)); +}) + +;; Break partial reg stall for cvtss2sd. + +(define_peephole2 + [(set (match_operand:DF 0 "register_operand") + (float_extend:DF + (match_operand:SF 1 "nonimmediate_operand")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_SSE_PARTIAL_REG_DEPENDENCY + && optimize_function_for_speed_p (cfun) + && SSE_REG_P (operands[0]) + && (!SSE_REG_P (operands[1]) + || REGNO (operands[0]) != REGNO (operands[1]))" + [(set (match_dup 0) + (vec_merge:V2DF + (float_extend:V2DF + (vec_select:V2SF + (match_dup 1) + (parallel [(const_int 0) (const_int 1)]))) + (match_dup 0) + (const_int 1)))] +{ + operands[0] = simplify_gen_subreg (V2DFmode, operands[0], + DFmode, 0); + operands[1] = simplify_gen_subreg (V4SFmode, operands[1], + SFmode, 0); + emit_move_insn (operands[0], CONST0_RTX (V2DFmode)); +}) + +;; Avoid store forwarding (partial memory) stall penalty +;; by passing DImode value through XMM registers. */ + +(define_insn "floatdi<X87MODEF:mode>2_i387_with_xmm" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF + (match_operand:DI 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_scratch:V4SI 3 "=X,x")) + (clobber (match_scratch:V4SI 4 "=X,x")) + (clobber (match_operand:DI 2 "memory_operand" "=X,m"))] + "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC + && !TARGET_64BIT && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "<X87MODEF:MODE>") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:X87MODEF 0 "fp_register_operand") + (float:X87MODEF (match_operand:DI 1 "register_operand"))) + (clobber (match_scratch:V4SI 3)) + (clobber (match_scratch:V4SI 4)) + (clobber (match_operand:DI 2 "memory_operand"))] + "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC + && !TARGET_64BIT && optimize_function_for_speed_p (cfun) + && reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] +{ + /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). + Assemble the 64-bit DImode value in an xmm register. */ + emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, operands[1], 0))); + emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, operands[1], 4))); + emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3], + operands[4])); + + operands[3] = gen_rtx_REG (DImode, REGNO (operands[3])); +}) + +(define_split + [(set (match_operand:X87MODEF 0 "fp_register_operand") + (float:X87MODEF (match_operand:DI 1 "memory_operand"))) + (clobber (match_scratch:V4SI 3)) + (clobber (match_scratch:V4SI 4)) + (clobber (match_operand:DI 2 "memory_operand"))] + "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) + && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC + && !TARGET_64BIT && optimize_function_for_speed_p (cfun) + && reload_completed" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) + +(define_expand "floatuns<SWI12:mode><MODEF:mode>2" + [(set (match_operand:MODEF 0 "register_operand") + (unsigned_float:MODEF + (match_operand:SWI12 1 "nonimmediate_operand")))] + "!TARGET_64BIT + && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH" +{ + operands[1] = convert_to_mode (SImode, operands[1], 1); + emit_insn (gen_floatsi<MODEF:mode>2 (operands[0], operands[1])); + DONE; +}) + +;; Avoid store forwarding (partial memory) stall penalty by extending +;; SImode value to DImode through XMM register instead of pushing two +;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES_TO_VEC +;; targets benefit from this optimization. Also note that fild +;; loads from memory only. + +(define_insn "*floatunssi<mode>2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand" "x,m"))) + (clobber (match_operand:DI 2 "memory_operand" "=m,m")) + (clobber (match_scratch:SI 3 "=X,x"))] + "!TARGET_64BIT + && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) + && TARGET_SSE" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand") + (unsigned_float:X87MODEF + (match_operand:SI 1 "register_operand"))) + (clobber (match_operand:DI 2 "memory_operand")) + (clobber (match_scratch:SI 3))] + "!TARGET_64BIT + && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) + && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] + "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);") + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand") + (unsigned_float:X87MODEF + (match_operand:SI 1 "memory_operand"))) + (clobber (match_operand:DI 2 "memory_operand")) + (clobber (match_scratch:SI 3))] + "!TARGET_64BIT + && TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) + && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] +{ + emit_move_insn (operands[3], operands[1]); + operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0); +}) + +(define_expand "floatunssi<mode>2" + [(parallel + [(set (match_operand:X87MODEF 0 "register_operand") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand"))) + (clobber (match_dup 2)) + (clobber (match_scratch:SI 3))])] + "!TARGET_64BIT + && ((TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode) + && TARGET_SSE) + || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))" +{ + if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + { + ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]); + DONE; + } + else + operands[2] = assign_386_stack_local (DImode, SLOT_TEMP); +}) + +(define_expand "floatunsdisf2" + [(use (match_operand:SF 0 "register_operand")) + (use (match_operand:DI 1 "nonimmediate_operand"))] + "TARGET_64BIT && TARGET_SSE_MATH" + "x86_emit_floatuns (operands); DONE;") + +(define_expand "floatunsdidf2" + [(use (match_operand:DF 0 "register_operand")) + (use (match_operand:DI 1 "nonimmediate_operand"))] + "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK) + && TARGET_SSE2 && TARGET_SSE_MATH" +{ + if (TARGET_64BIT) + x86_emit_floatuns (operands); + else + ix86_expand_convert_uns_didf_sse (operands[0], operands[1]); + DONE; +}) + +;; Load effective address instructions + +(define_insn_and_split "*lea<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (match_operand:SWI48 1 "address_no_seg_operand" "Ts"))] + "" +{ + if (SImode_address_operand (operands[1], VOIDmode)) + { + gcc_assert (TARGET_64BIT); + return "lea{l}\t{%E1, %k0|%k0, %E1}"; + } + else + return "lea{<imodesuffix>}\t{%E1, %0|%0, %E1}"; +} + "reload_completed && ix86_avoid_lea_for_addr (insn, operands)" + [(const_int 0)] +{ + enum machine_mode mode = <MODE>mode; + rtx pat; + + /* ix86_avoid_lea_for_addr re-recognizes insn and may + change operands[] array behind our back. */ + pat = PATTERN (curr_insn); + + operands[0] = SET_DEST (pat); + operands[1] = SET_SRC (pat); + + /* Emit all operations in SImode for zero-extended addresses. */ + if (SImode_address_operand (operands[1], VOIDmode)) + mode = SImode; + + ix86_split_lea_for_addr (curr_insn, operands, mode); + + /* Zero-extend return register to DImode for zero-extended addresses. */ + if (mode != <MODE>mode) + emit_insn (gen_zero_extendsidi2 + (operands[0], gen_lowpart (mode, operands[0]))); + + DONE; +} + [(set_attr "type" "lea") + (set (attr "mode") + (if_then_else + (match_operand 1 "SImode_address_operand") + (const_string "SI") + (const_string "<MODE>")))]) + +;; Add instructions + +(define_expand "add<mode>3" + [(set (match_operand:SDWIM 0 "nonimmediate_operand") + (plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand") + (match_operand:SDWIM 2 "<general_operand>")))] + "" + "ix86_expand_binary_operator (PLUS, <MODE>mode, operands); DONE;") + +(define_insn_and_split "*add<dwi>3_doubleword" + [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o") + (plus:<DWI> + (match_operand:<DWI> 1 "nonimmediate_operand" "%0,0") + (match_operand:<DWI> 2 "<general_operand>" "ro<di>,r<di>"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, <DWI>mode, operands)" + "#" + "reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 1) (match_dup 2)] + UNSPEC_ADD_CARRY)) + (set (match_dup 0) + (plus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (plus:DWIH + (match_dup 4) + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 5)))) + (clobber (reg:CC FLAGS_REG))])] + "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);") + +(define_insn "*add<mode>3_cc" + [(set (reg:CC FLAGS_REG) + (unspec:CC + [(match_operand:SWI48 1 "nonimmediate_operand" "%0,0") + (match_operand:SWI48 2 "<general_operand>" "r<i>,rm")] + UNSPEC_ADD_CARRY)) + (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") + (plus:SWI48 (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" + "add{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "addqi3_cc" + [(set (reg:CC FLAGS_REG) + (unspec:CC + [(match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qn,qm")] + UNSPEC_ADD_CARRY)) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (plus:QI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, QImode, operands)" + "add{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*add<mode>_1" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,r,r") + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r") + (match_operand:SWI48 2 "x86_64_general_operand" "rme,re,0,le"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{<imodesuffix>}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{<imodesuffix>}\t%0"; + } + + default: + /* For most processors, ADD is faster than LEA. This alternative + was added to use ADD as much as possible. */ + if (which_alternative == 2) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) + return "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; + + return "add{<imodesuffix>}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "3") + (const_string "lea") + (match_operand:SWI48 2 "incdec_operand") + (const_string "incdec") + ] + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +;; It may seem that nonimmediate operand is proper one for operand 1. +;; The addsi_1 pattern allows nonimmediate operand at that place and +;; we take care in ix86_binary_operator_ok to not allow two memory +;; operands so proper swapping will be done in reload. This allow +;; patterns constructed from addsi_1 to match. + +(define_insn "addsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r") + (match_operand:SI 2 "x86_64_general_operand" "rme,0,le")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } + + default: + /* For most processors, ADD is faster than LEA. This alternative + was added to use ADD as much as possible. */ + if (which_alternative == 1) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + if (x86_maybe_negate_const_int (&operands[2], SImode)) + return "sub{l}\t{%2, %k0|%k0, %2}"; + + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "lea") + (match_operand:SI 2 "incdec_operand") + (const_string "incdec") + ] + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "SI")]) + +(define_insn "*addhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp") + (match_operand:HI 2 "general_operand" "rn,rm,0,ln"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } + + default: + /* For most processors, ADD is faster than LEA. This alternative + was added to use ADD as much as possible. */ + if (which_alternative == 2) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (x86_maybe_negate_const_int (&operands[2], HImode)) + return "sub{w}\t{%2, %0|%0, %2}"; + + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "3") + (const_string "lea") + (match_operand:HI 2 "incdec_operand") + (const_string "incdec") + ] + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "HI,HI,HI,SI")]) + +;; %%% Potential partial reg stall on alternatives 3 and 4. What to do? +(define_insn "*addqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp") + (match_operand:QI 2 "general_operand" "qn,qm,0,rn,0,ln"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, QImode, operands)" +{ + bool widen = (which_alternative == 3 || which_alternative == 4); + + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + } + + default: + /* For most processors, ADD is faster than LEA. These alternatives + were added to use ADD as much as possible. */ + if (which_alternative == 2 || which_alternative == 4) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (x86_maybe_negate_const_int (&operands[2], QImode)) + { + if (widen) + return "sub{l}\t{%2, %k0|%k0, %2}"; + else + return "sub{b}\t{%2, %0|%0, %2}"; + } + if (widen) + return "add{l}\t{%k2, %k0|%k0, %k2}"; + else + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "5") + (const_string "lea") + (match_operand:QI 2 "incdec_operand") + (const_string "incdec") + ] + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "QI,QI,QI,SI,SI,SI")]) + +(define_insn "*addqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (plus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qm"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[1] == const1_rtx) + return "inc{b}\t%0"; + else + { + gcc_assert (operands[1] == constm1_rtx); + return "dec{b}\t%0"; + } + + default: + if (x86_maybe_negate_const_int (&operands[1], QImode)) + return "sub{b}\t{%1, %0|%0, %1}"; + + return "add{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 1 "incdec_operand") + (const_string "incdec") + (const_string "alu1"))) + (set (attr "memory") + (if_then_else (match_operand 1 "memory_operand") + (const_string "load") + (const_string "none"))) + (set_attr "mode" "QI")]) + +;; Split non destructive adds if we cannot use lea. +(define_split + [(set (match_operand:SWI48 0 "register_operand") + (plus:SWI48 (match_operand:SWI48 1 "register_operand") + (match_operand:SWI48 2 "x86_64_nonmemory_operand"))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && ix86_avoid_lea_for_add (insn, operands)" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])]) + +;; Convert add to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:SWI 0 "register_operand") + (plus:SWI (match_operand:SWI 1 "register_operand") + (match_operand:SWI 2 "<nonmemory_operand>"))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && ix86_lea_for_add_ok (insn, operands)" + [(const_int 0)] +{ + enum machine_mode mode = <MODE>mode; + rtx pat; + + if (<MODE_SIZE> < GET_MODE_SIZE (SImode)) + { + mode = SImode; + operands[0] = gen_lowpart (mode, operands[0]); + operands[1] = gen_lowpart (mode, operands[1]); + operands[2] = gen_lowpart (mode, operands[2]); + } + + pat = gen_rtx_PLUS (mode, operands[1], operands[2]); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +}) + +;; Split non destructive adds if we cannot use lea. +(define_split + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "x86_64_nonmemory_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && reload_completed && ix86_avoid_lea_for_add (insn, operands)" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) + (zero_extend:DI (plus:SI (match_dup 3) (match_dup 2)))) + (clobber (reg:CC FLAGS_REG))])] + "operands[3] = gen_lowpart (SImode, operands[0]);") + +;; Convert add to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "x86_64_nonmemory_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed && ix86_lea_for_add_ok (insn, operands)" + [(set (match_dup 0) + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]) + +(define_insn "*add<mode>_2" + [(set (reg FLAGS_REG) + (compare + (plus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>") + (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>") + (plus:SWI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{<imodesuffix>}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{<imodesuffix>}\t%0"; + } + + default: + if (which_alternative == 2) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) + return "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; + + return "add{<imodesuffix>}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SWI 2 "incdec_operand") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*addsi_2_zext" + [(set (reg FLAGS_REG) + (compare + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r") + (match_operand:SI 2 "x86_64_general_operand" "rme,0")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } + + default: + if (which_alternative == 1) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + if (x86_maybe_negate_const_int (&operands[2], SImode)) + return "sub{l}\t{%2, %k0|%k0, %2}"; + + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "SI")]) + +(define_insn "*add<mode>_3" + [(set (reg FLAGS_REG) + (compare + (neg:SWI (match_operand:SWI 2 "<general_operand>" "<g>,0")) + (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>"))) + (clobber (match_scratch:SWI 0 "=<r>,<r>"))] + "ix86_match_ccmode (insn, CCZmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{<imodesuffix>}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{<imodesuffix>}\t%0"; + } + + default: + if (which_alternative == 1) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) + return "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; + + return "add{<imodesuffix>}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SWI 2 "incdec_operand") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*addsi_3_zext" + [(set (reg FLAGS_REG) + (compare + (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rme,0")) + (match_operand:SI 1 "nonimmediate_operand" "%0,r"))) + (set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode) + && ix86_binary_operator_ok (PLUS, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } + + default: + if (which_alternative == 1) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + if (x86_maybe_negate_const_int (&operands[2], SImode)) + return "sub{l}\t{%2, %k0|%k0, %2}"; + + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "SI")]) + +; For comparisons against 1, -1 and 128, we may generate better code +; by converting cmp to add, inc or dec as done by peephole2. This pattern +; is matched then. We can't accept general immediate, because for +; case of overflows, the result is messed up. +; Also carry flag is reversed compared to cmp, so this conversion is valid +; only for comparisons not depending on it. + +(define_insn "*adddi_4" + [(set (reg FLAGS_REG) + (compare + (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:DI 2 "x86_64_immediate_operand" "e"))) + (clobber (match_scratch:DI 0 "=rm"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCGCmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx) + return "inc{q}\t%0"; + else + { + gcc_assert (operands[2] == const1_rtx); + return "dec{q}\t%0"; + } + + default: + if (x86_maybe_negate_const_int (&operands[2], DImode)) + return "add{q}\t{%2, %0|%0, %2}"; + + return "sub{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "DI")]) + +; For comparisons against 1, -1 and 128, we may generate better code +; by converting cmp to add, inc or dec as done by peephole2. This pattern +; is matched then. We can't accept general immediate, because for +; case of overflows, the result is messed up. +; Also carry flag is reversed compared to cmp, so this conversion is valid +; only for comparisons not depending on it. + +(define_insn "*add<mode>_4" + [(set (reg FLAGS_REG) + (compare + (match_operand:SWI124 1 "nonimmediate_operand" "0") + (match_operand:SWI124 2 "const_int_operand" "n"))) + (clobber (match_scratch:SWI124 0 "=<r>m"))] + "ix86_match_ccmode (insn, CCGCmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx) + return "inc{<imodesuffix>}\t%0"; + else + { + gcc_assert (operands[2] == const1_rtx); + return "dec{<imodesuffix>}\t%0"; + } + + default: + if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) + return "add{<imodesuffix>}\t{%2, %0|%0, %2}"; + + return "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:<MODE> 2 "incdec_operand") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +(define_insn "*add<mode>_5" + [(set (reg FLAGS_REG) + (compare + (plus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0,<r>") + (match_operand:SWI 2 "<general_operand>" "<g>,0")) + (const_int 0))) + (clobber (match_scratch:SWI 0 "=<r>,<r>"))] + "ix86_match_ccmode (insn, CCGOCmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{<imodesuffix>}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{<imodesuffix>}\t%0"; + } + + default: + if (which_alternative == 1) + { + rtx tmp; + tmp = operands[1], operands[1] = operands[2], operands[2] = tmp; + } + + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) + return "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; + + return "add{<imodesuffix>}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SWI 2 "incdec_operand") + (const_string "incdec") + (const_string "alu"))) + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +(define_insn "addqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0,0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_x64nomem_operand" "Qn,m"))) + (clobber (reg:CC FLAGS_REG))] + "" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%h0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{b}\t%h0"; + } + + default: + return "add{b}\t{%2, %h0|%h0, %2}"; + } +} + [(set_attr "isa" "*,nox64") + (set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand") + (const_string "incdec") + (const_string "alu"))) + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +(define_insn "*addqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "%0") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "" + "add{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +;; Add with jump on overflow. +(define_expand "addv<mode>4" + [(parallel [(set (reg:CCO FLAGS_REG) + (eq:CCO (plus:<DWI> + (sign_extend:<DWI> + (match_operand:SWI 1 "nonimmediate_operand")) + (sign_extend:<DWI> + (match_operand:SWI 2 "<general_operand>"))) + (sign_extend:<DWI> + (plus:SWI (match_dup 1) (match_dup 2))))) + (set (match_operand:SWI 0 "register_operand") + (plus:SWI (match_dup 1) (match_dup 2)))]) + (set (pc) (if_then_else + (eq (reg:CCO FLAGS_REG) (const_int 0)) + (label_ref (match_operand 3)) + (pc)))] + "" + "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);") + +(define_insn "*addv<mode>4" + [(set (reg:CCO FLAGS_REG) + (eq:CCO (plus:<DWI> + (sign_extend:<DWI> + (match_operand:SWI 1 "nonimmediate_operand" "%0,0")) + (sign_extend:<DWI> + (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>"))) + (sign_extend:<DWI> + (plus:SWI (match_dup 1) (match_dup 2))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m") + (plus:SWI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" + "add{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +;; The lea patterns for modes less than 32 bits need to be matched by +;; several insns converted to real lea by splitters. + +(define_insn_and_split "*lea_general_1" + [(set (match_operand 0 "register_operand" "=r") + (plus (plus (match_operand 1 "index_register_operand" "l") + (match_operand 2 "register_operand" "r")) + (match_operand 3 "immediate_operand" "i")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && GET_MODE (operands[0]) == GET_MODE (operands[2]) + && (GET_MODE (operands[0]) == GET_MODE (operands[3]) + || GET_MODE (operands[3]) == VOIDmode)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + enum machine_mode mode = SImode; + rtx pat; + + operands[0] = gen_lowpart (mode, operands[0]); + operands[1] = gen_lowpart (mode, operands[1]); + operands[2] = gen_lowpart (mode, operands[2]); + operands[3] = gen_lowpart (mode, operands[3]); + + pat = gen_rtx_PLUS (mode, gen_rtx_PLUS (mode, operands[1], operands[2]), + operands[3]); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_2" + [(set (match_operand 0 "register_operand" "=r") + (plus (mult (match_operand 1 "index_register_operand" "l") + (match_operand 2 "const248_operand" "n")) + (match_operand 3 "nonmemory_operand" "ri")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && (GET_MODE (operands[0]) == GET_MODE (operands[3]) + || GET_MODE (operands[3]) == VOIDmode)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + enum machine_mode mode = SImode; + rtx pat; + + operands[0] = gen_lowpart (mode, operands[0]); + operands[1] = gen_lowpart (mode, operands[1]); + operands[3] = gen_lowpart (mode, operands[3]); + + pat = gen_rtx_PLUS (mode, gen_rtx_MULT (mode, operands[1], operands[2]), + operands[3]); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_3" + [(set (match_operand 0 "register_operand" "=r") + (plus (plus (mult (match_operand 1 "index_register_operand" "l") + (match_operand 2 "const248_operand" "n")) + (match_operand 3 "register_operand" "r")) + (match_operand 4 "immediate_operand" "i")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && GET_MODE (operands[0]) == GET_MODE (operands[3])" + "#" + "&& reload_completed" + [(const_int 0)] +{ + enum machine_mode mode = SImode; + rtx pat; + + operands[0] = gen_lowpart (mode, operands[0]); + operands[1] = gen_lowpart (mode, operands[1]); + operands[3] = gen_lowpart (mode, operands[3]); + operands[4] = gen_lowpart (mode, operands[4]); + + pat = gen_rtx_PLUS (mode, + gen_rtx_PLUS (mode, + gen_rtx_MULT (mode, operands[1], + operands[2]), + operands[3]), + operands[4]); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_4" + [(set (match_operand 0 "register_operand" "=r") + (any_or (ashift + (match_operand 1 "index_register_operand" "l") + (match_operand 2 "const_int_operand" "n")) + (match_operand 3 "const_int_operand" "n")))] + "(((GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))) + || GET_MODE (operands[0]) == SImode + || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) - 1 < 3 + && ((unsigned HOST_WIDE_INT) INTVAL (operands[3]) + < ((unsigned HOST_WIDE_INT) 1 << INTVAL (operands[2])))" + "#" + "&& reload_completed" + [(const_int 0)] +{ + enum machine_mode mode = GET_MODE (operands[0]); + rtx pat; + + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)) + { + mode = SImode; + operands[0] = gen_lowpart (mode, operands[0]); + operands[1] = gen_lowpart (mode, operands[1]); + } + + operands[2] = GEN_INT (1 << INTVAL (operands[2])); + + pat = plus_constant (mode, gen_rtx_MULT (mode, operands[1], operands[2]), + INTVAL (operands[3])); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set (attr "mode") + (if_then_else (match_operand:DI 0) + (const_string "DI") + (const_string "SI")))]) + +;; Subtract instructions + +(define_expand "sub<mode>3" + [(set (match_operand:SDWIM 0 "nonimmediate_operand") + (minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand") + (match_operand:SDWIM 2 "<general_operand>")))] + "" + "ix86_expand_binary_operator (MINUS, <MODE>mode, operands); DONE;") + +(define_insn_and_split "*sub<dwi>3_doubleword" + [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=r,o") + (minus:<DWI> + (match_operand:<DWI> 1 "nonimmediate_operand" "0,0") + (match_operand:<DWI> 2 "<general_operand>" "ro<di>,r<di>"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)" + "#" + "reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) + (minus:DWIH (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (minus:DWIH + (match_dup 4) + (plus:DWIH + (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 5)))) + (clobber (reg:CC FLAGS_REG))])] + "split_double_mode (<DWI>mode, &operands[0], 3, &operands[0], &operands[3]);") + +(define_insn "*sub<mode>_1" + [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") + (minus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)" + "sub{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "*subsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "x86_64_general_operand" "rme")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (minus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qm"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "sub{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*sub<mode>_2" + [(set (reg FLAGS_REG) + (compare + (minus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") + (minus:SWI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)" + "sub{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "*subsi_2_zext" + [(set (reg FLAGS_REG) + (compare + (minus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "x86_64_general_operand" "rme")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_dup 1) + (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; Subtract with jump on overflow. +(define_expand "subv<mode>4" + [(parallel [(set (reg:CCO FLAGS_REG) + (eq:CCO (minus:<DWI> + (sign_extend:<DWI> + (match_operand:SWI 1 "nonimmediate_operand")) + (sign_extend:<DWI> + (match_operand:SWI 2 "<general_operand>"))) + (sign_extend:<DWI> + (minus:SWI (match_dup 1) (match_dup 2))))) + (set (match_operand:SWI 0 "register_operand") + (minus:SWI (match_dup 1) (match_dup 2)))]) + (set (pc) (if_then_else + (eq (reg:CCO FLAGS_REG) (const_int 0)) + (label_ref (match_operand 3)) + (pc)))] + "" + "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);") + +(define_insn "*subv<mode>4" + [(set (reg:CCO FLAGS_REG) + (eq:CCO (minus:<DWI> + (sign_extend:<DWI> + (match_operand:SWI 1 "nonimmediate_operand" "0,0")) + (sign_extend:<DWI> + (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))) + (sign_extend:<DWI> + (minus:SWI (match_dup 1) (match_dup 2))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") + (minus:SWI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)" + "sub{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "*sub<mode>_3" + [(set (reg FLAGS_REG) + (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") + (minus:SWI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)" + "sub{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "*subsi_3_zext" + [(set (reg FLAGS_REG) + (compare (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "x86_64_general_operand" "rme"))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_dup 1) + (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %1|%1, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; Add with carry and subtract with borrow + +(define_expand "<plusminus_insn><mode>3_carry" + [(parallel + [(set (match_operand:SWI 0 "nonimmediate_operand") + (plusminus:SWI + (match_operand:SWI 1 "nonimmediate_operand") + (plus:SWI (match_operator:SWI 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") + (const_int 0)]) + (match_operand:SWI 2 "<general_operand>")))) + (clobber (reg:CC FLAGS_REG))])] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)") + +(define_insn "*<plusminus_insn><mode>3_carry" + [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") + (plusminus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0") + (plus:SWI + (match_operator 3 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" + "<plusminus_carry_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "<MODE>")]) + +(define_insn "*addsi3_carry_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (plus:SI (match_operator 3 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SI 2 "x86_64_general_operand" "rme"))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" + "adc{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi3_carry_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "0") + (plus:SI (match_operator 3 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SI 2 "x86_64_general_operand" "rme"))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sbb{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + +;; ADCX instruction + +(define_insn "adcx<mode>3" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "%0") + (plus:SWI48 + (match_operator 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI48 2 "nonimmediate_operand" "rm"))) + (const_int 0))) + (set (match_operand:SWI48 0 "register_operand" "=r") + (plus:SWI48 (match_dup 1) + (plus:SWI48 (match_op_dup 4 + [(match_dup 3) (const_int 0)]) + (match_dup 2))))] + "TARGET_ADX && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" + "adcx\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "mode" "<MODE>")]) + +;; Overflow setting add instructions + +(define_insn "*add<mode>3_cconly_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0") + (match_operand:SWI 2 "<general_operand>" "<g>")) + (match_dup 1))) + (clobber (match_scratch:SWI 0 "=<r>"))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "add{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "*add<mode>3_cc_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0,0") + (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")) + (match_dup 1))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>") + (plus:SWI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" + "add{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "*addsi3_zext_cc_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SI + (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "x86_64_general_operand" "rme")) + (match_dup 1))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" + "add{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; The patterns that match these are at the end of this file. + +(define_expand "<plusminus_insn>xf3" + [(set (match_operand:XF 0 "register_operand") + (plusminus:XF + (match_operand:XF 1 "register_operand") + (match_operand:XF 2 "register_operand")))] + "TARGET_80387") + +(define_expand "<plusminus_insn><mode>3" + [(set (match_operand:MODEF 0 "register_operand") + (plusminus:MODEF + (match_operand:MODEF 1 "register_operand") + (match_operand:MODEF 2 "nonimmediate_operand")))] + "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)) + || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)") + +;; Multiply instructions + +(define_expand "mul<mode>3" + [(parallel [(set (match_operand:SWIM248 0 "register_operand") + (mult:SWIM248 + (match_operand:SWIM248 1 "register_operand") + (match_operand:SWIM248 2 "<general_operand>"))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_expand "mulqi3" + [(parallel [(set (match_operand:QI 0 "register_operand") + (mult:QI + (match_operand:QI 1 "register_operand") + (match_operand:QI 2 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH") + +;; On AMDFAM10 +;; IMUL reg32/64, reg32/64, imm8 Direct +;; IMUL reg32/64, mem32/64, imm8 VectorPath +;; IMUL reg32/64, reg32/64, imm32 Direct +;; IMUL reg32/64, mem32/64, imm32 VectorPath +;; IMUL reg32/64, reg32/64 Direct +;; IMUL reg32/64, mem32/64 Direct +;; +;; On BDVER1, all above IMULs use DirectPath + +(define_insn "*mul<mode>3_1" + [(set (match_operand:SWI48 0 "register_operand" "=r,r,r") + (mult:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:SWI48 2 "<general_operand>" "K,<i>,mr"))) + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + imul{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand")) + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand")) + (const_string "vector")] + (const_string "direct"))) + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "<MODE>")]) + +(define_insn "*mulsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:SI 2 "x86_64_general_operand" "K,e,mr")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + imul{l}\t{%2, %1, %k0|%k0, %1, %2} + imul{l}\t{%2, %1, %k0|%k0, %1, %2} + imul{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand")) + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand")) + (const_string "vector")] + (const_string "direct"))) + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "SI")]) + +;; On AMDFAM10 +;; IMUL reg16, reg16, imm8 VectorPath +;; IMUL reg16, mem16, imm8 VectorPath +;; IMUL reg16, reg16, imm16 VectorPath +;; IMUL reg16, mem16, imm16 VectorPath +;; IMUL reg16, reg16 Direct +;; IMUL reg16, mem16 Direct +;; +;; On BDVER1, all HI MULs use DoublePath + +(define_insn "*mulhi3_1" + [(set (match_operand:HI 0 "register_operand" "=r,r,r") + (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:HI 2 "general_operand" "K,n,mr"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_HIMODE_MATH + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + imul{w}\t{%2, %1, %0|%0, %1, %2} + imul{w}\t{%2, %1, %0|%0, %1, %2} + imul{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1,2") + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(eq_attr "alternative" "0,1") + (const_string "vector")] + (const_string "direct"))) + (set_attr "bdver1_decode" "double") + (set_attr "mode" "HI")]) + +;;On AMDFAM10 and BDVER1 +;; MUL reg8 Direct +;; MUL mem8 Direct + +(define_insn "*mulqi3_1" + [(set (match_operand:QI 0 "register_operand" "=a") + (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "nonimmediate_operand" "qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{b}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "QI")]) + +;; Multiply with jump on overflow. +(define_expand "mulv<mode>4" + [(parallel [(set (reg:CCO FLAGS_REG) + (eq:CCO (mult:<DWI> + (sign_extend:<DWI> + (match_operand:SWI48 1 "register_operand")) + (sign_extend:<DWI> + (match_operand:SWI48 2 "<general_operand>"))) + (sign_extend:<DWI> + (mult:SWI48 (match_dup 1) (match_dup 2))))) + (set (match_operand:SWI48 0 "register_operand") + (mult:SWI48 (match_dup 1) (match_dup 2)))]) + (set (pc) (if_then_else + (eq (reg:CCO FLAGS_REG) (const_int 0)) + (label_ref (match_operand 3)) + (pc)))]) + +(define_insn "*mulv<mode>4" + [(set (reg:CCO FLAGS_REG) + (eq:CCO (mult:<DWI> + (sign_extend:<DWI> + (match_operand:SWI 1 "nonimmediate_operand" "%rm,rm,0")) + (sign_extend:<DWI> + (match_operand:SWI 2 "<general_operand>" "K,<i>,mr"))) + (sign_extend:<DWI> + (mult:SWI (match_dup 1) (match_dup 2))))) + (set (match_operand:SWI 0 "register_operand" "=r,r,r") + (mult:SWI (match_dup 1) (match_dup 2)))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + imul{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand")) + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand")) + (const_string "vector")] + (const_string "direct"))) + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "<MODE>")]) + +(define_expand "<u>mul<mode><dwi>3" + [(parallel [(set (match_operand:<DWI> 0 "register_operand") + (mult:<DWI> + (any_extend:<DWI> + (match_operand:DWIH 1 "nonimmediate_operand")) + (any_extend:<DWI> + (match_operand:DWIH 2 "register_operand")))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_expand "<u>mulqihi3" + [(parallel [(set (match_operand:HI 0 "register_operand") + (mult:HI + (any_extend:HI + (match_operand:QI 1 "nonimmediate_operand")) + (any_extend:HI + (match_operand:QI 2 "register_operand")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH") + +(define_insn "*bmi2_umulditi3_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI + (match_operand:DI 2 "nonimmediate_operand" "%d") + (match_operand:DI 3 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 1 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI (match_dup 2)) + (zero_extend:TI (match_dup 3))) + (const_int 64))))] + "TARGET_64BIT && TARGET_BMI2 + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mulx\t{%3, %0, %1|%1, %0, %3}" + [(set_attr "type" "imulx") + (set_attr "prefix" "vex") + (set_attr "mode" "DI")]) + +(define_insn "*bmi2_umulsidi3_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI + (match_operand:SI 2 "nonimmediate_operand" "%d") + (match_operand:SI 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SI 1 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI (match_dup 2)) + (zero_extend:DI (match_dup 3))) + (const_int 32))))] + "!TARGET_64BIT && TARGET_BMI2 + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mulx\t{%3, %0, %1|%1, %0, %3}" + [(set_attr "type" "imulx") + (set_attr "prefix" "vex") + (set_attr "mode" "SI")]) + +(define_insn "*umul<mode><dwi>3_1" + [(set (match_operand:<DWI> 0 "register_operand" "=r,A") + (mult:<DWI> + (zero_extend:<DWI> + (match_operand:DWIH 1 "nonimmediate_operand" "%d,0")) + (zero_extend:<DWI> + (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm")))) + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + # + mul{<imodesuffix>}\t%2" + [(set_attr "isa" "bmi2,*") + (set_attr "type" "imulx,imul") + (set_attr "length_immediate" "*,0") + (set (attr "athlon_decode") + (cond [(eq_attr "alternative" "1") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))] + (const_string "*"))) + (set_attr "amdfam10_decode" "*,double") + (set_attr "bdver1_decode" "*,direct") + (set_attr "prefix" "vex,orig") + (set_attr "mode" "<MODE>")]) + +;; Convert mul to the mulx pattern to avoid flags dependency. +(define_split + [(set (match_operand:<DWI> 0 "register_operand") + (mult:<DWI> + (zero_extend:<DWI> + (match_operand:DWIH 1 "register_operand")) + (zero_extend:<DWI> + (match_operand:DWIH 2 "nonimmediate_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2 && reload_completed + && true_regnum (operands[1]) == DX_REG" + [(parallel [(set (match_dup 3) + (mult:DWIH (match_dup 1) (match_dup 2))) + (set (match_dup 4) + (truncate:DWIH + (lshiftrt:<DWI> + (mult:<DWI> (zero_extend:<DWI> (match_dup 1)) + (zero_extend:<DWI> (match_dup 2))) + (match_dup 5))))])] +{ + split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]); + + operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)); +}) + +(define_insn "*mul<mode><dwi>3_1" + [(set (match_operand:<DWI> 0 "register_operand" "=A") + (mult:<DWI> + (sign_extend:<DWI> + (match_operand:DWIH 1 "nonimmediate_operand" "%0")) + (sign_extend:<DWI> + (match_operand:DWIH 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "imul{<imodesuffix>}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "<MODE>")]) + +(define_insn "*<u>mulqihi3_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (mult:HI + (any_extend:HI + (match_operand:QI 1 "nonimmediate_operand" "%0")) + (any_extend:HI + (match_operand:QI 2 "nonimmediate_operand" "qm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "<sgnprefix>mul{b}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "QI")]) + +(define_expand "<s>mul<mode>3_highpart" + [(parallel [(set (match_operand:SWI48 0 "register_operand") + (truncate:SWI48 + (lshiftrt:<DWI> + (mult:<DWI> + (any_extend:<DWI> + (match_operand:SWI48 1 "nonimmediate_operand")) + (any_extend:<DWI> + (match_operand:SWI48 2 "register_operand"))) + (match_dup 4)))) + (clobber (match_scratch:SWI48 3)) + (clobber (reg:CC FLAGS_REG))])] + "" + "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));") + +(define_insn "*<s>muldi3_highpart_1" + [(set (match_operand:DI 0 "register_operand" "=d") + (truncate:DI + (lshiftrt:TI + (mult:TI + (any_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "%a")) + (any_extend:TI + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (const_int 64)))) + (clobber (match_scratch:DI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "<sgnprefix>mul{q}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "DI")]) + +(define_insn "*<s>mulsi3_highpart_1" + [(set (match_operand:SI 0 "register_operand" "=d") + (truncate:SI + (lshiftrt:DI + (mult:DI + (any_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "%a")) + (any_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "<sgnprefix>mul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "SI")]) + +(define_insn "*<s>mulsi3_highpart_zext" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (truncate:SI + (lshiftrt:DI + (mult:DI (any_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "%a")) + (any_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32))))) + (clobber (match_scratch:SI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "<sgnprefix>mul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "bdver1_decode" "direct") + (set_attr "mode" "SI")]) + +;; The patterns that match these are at the end of this file. + +(define_expand "mulxf3" + [(set (match_operand:XF 0 "register_operand") + (mult:XF (match_operand:XF 1 "register_operand") + (match_operand:XF 2 "register_operand")))] + "TARGET_80387") + +(define_expand "mul<mode>3" + [(set (match_operand:MODEF 0 "register_operand") + (mult:MODEF (match_operand:MODEF 1 "register_operand") + (match_operand:MODEF 2 "nonimmediate_operand")))] + "(TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode)) + || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)") + +;; Divide instructions + +;; The patterns that match these are at the end of this file. + +(define_expand "divxf3" + [(set (match_operand:XF 0 "register_operand") + (div:XF (match_operand:XF 1 "register_operand") + (match_operand:XF 2 "register_operand")))] + "TARGET_80387") + +(define_expand "divdf3" + [(set (match_operand:DF 0 "register_operand") + (div:DF (match_operand:DF 1 "register_operand") + (match_operand:DF 2 "nonimmediate_operand")))] + "(TARGET_80387 && X87_ENABLE_ARITH (DFmode)) + || (TARGET_SSE2 && TARGET_SSE_MATH)") + +(define_expand "divsf3" + [(set (match_operand:SF 0 "register_operand") + (div:SF (match_operand:SF 1 "register_operand") + (match_operand:SF 2 "nonimmediate_operand")))] + "(TARGET_80387 && X87_ENABLE_ARITH (SFmode)) + || TARGET_SSE_MATH" +{ + if (TARGET_SSE_MATH + && TARGET_RECIP_DIV + && optimize_insn_for_speed_p () + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + ix86_emit_swdivsf (operands[0], operands[1], + operands[2], SFmode); + DONE; + } +}) + +;; Divmod instructions. + +(define_expand "divmod<mode>4" + [(parallel [(set (match_operand:SWIM248 0 "register_operand") + (div:SWIM248 + (match_operand:SWIM248 1 "register_operand") + (match_operand:SWIM248 2 "nonimmediate_operand"))) + (set (match_operand:SWIM248 3 "register_operand") + (mod:SWIM248 (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])]) + +;; Split with 8bit unsigned divide: +;; if (dividend an divisor are in [0-255]) +;; use 8bit unsigned integer divide +;; else +;; use original integer divide +(define_split + [(set (match_operand:SWI48 0 "register_operand") + (div:SWI48 (match_operand:SWI48 2 "register_operand") + (match_operand:SWI48 3 "nonimmediate_operand"))) + (set (match_operand:SWI48 1 "register_operand") + (mod:SWI48 (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (<MODE>mode, operands, true); DONE;") + +(define_insn_and_split "divmod<mode>4_1" + [(set (match_operand:SWI48 0 "register_operand" "=a") + (div:SWI48 (match_operand:SWI48 2 "register_operand" "0") + (match_operand:SWI48 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWI48 1 "register_operand" "=&d") + (mod:SWI48 (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "reload_completed" + [(parallel [(set (match_dup 1) + (ashiftrt:SWI48 (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (div:SWI48 (match_dup 2) (match_dup 3))) + (set (match_dup 1) + (mod:SWI48 (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1); + + if (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + operands[4] = operands[2]; + else + { + /* Avoid use of cltd in favor of a mov+shift. */ + emit_move_insn (operands[1], operands[2]); + operands[4] = operands[1]; + } +} + [(set_attr "type" "multi") + (set_attr "mode" "<MODE>")]) + +(define_insn_and_split "*divmod<mode>4" + [(set (match_operand:SWIM248 0 "register_operand" "=a") + (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") + (match_operand:SWIM248 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWIM248 1 "register_operand" "=&d") + (mod:SWIM248 (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "reload_completed" + [(parallel [(set (match_dup 1) + (ashiftrt:SWIM248 (match_dup 4) (match_dup 5))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (div:SWIM248 (match_dup 2) (match_dup 3))) + (set (match_dup 1) + (mod:SWIM248 (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1); + + if (<MODE>mode != HImode + && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)) + operands[4] = operands[2]; + else + { + /* Avoid use of cltd in favor of a mov+shift. */ + emit_move_insn (operands[1], operands[2]); + operands[4] = operands[1]; + } +} + [(set_attr "type" "multi") + (set_attr "mode" "<MODE>")]) + +(define_insn "*divmod<mode>4_noext" + [(set (match_operand:SWIM248 0 "register_operand" "=a") + (div:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") + (match_operand:SWIM248 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWIM248 1 "register_operand" "=d") + (mod:SWIM248 (match_dup 2) (match_dup 3))) + (use (match_operand:SWIM248 4 "register_operand" "1")) + (clobber (reg:CC FLAGS_REG))] + "" + "idiv{<imodesuffix>}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "<MODE>")]) + +(define_expand "divmodqi4" + [(parallel [(set (match_operand:QI 0 "register_operand") + (div:QI + (match_operand:QI 1 "register_operand") + (match_operand:QI 2 "nonimmediate_operand"))) + (set (match_operand:QI 3 "register_operand") + (mod:QI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" +{ + rtx div, mod, insn; + rtx tmp0, tmp1; + + tmp0 = gen_reg_rtx (HImode); + tmp1 = gen_reg_rtx (HImode); + + /* Extend operands[1] to HImode. Generate 8bit divide. Result is + in AX. */ + emit_insn (gen_extendqihi2 (tmp1, operands[1])); + emit_insn (gen_divmodhiqi3 (tmp0, tmp1, operands[2])); + + /* Extract remainder from AH. */ + tmp1 = gen_rtx_SIGN_EXTRACT (QImode, tmp0, GEN_INT (8), GEN_INT (8)); + insn = emit_move_insn (operands[3], tmp1); + + mod = gen_rtx_MOD (QImode, operands[1], operands[2]); + set_unique_reg_note (insn, REG_EQUAL, mod); + + /* Extract quotient from AL. */ + insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0)); + + div = gen_rtx_DIV (QImode, operands[1], operands[2]); + set_unique_reg_note (insn, REG_EQUAL, div); + + DONE; +}) + +;; Divide AX by r/m8, with result stored in +;; AL <- Quotient +;; AH <- Remainder +;; Change div/mod to HImode and extend the second argument to HImode +;; so that mode of div/mod matches with mode of arguments. Otherwise +;; combine may fail. +(define_insn "divmodhiqi3" + [(set (match_operand:HI 0 "register_operand" "=a") + (ior:HI + (ashift:HI + (zero_extend:HI + (truncate:QI + (mod:HI (match_operand:HI 1 "register_operand" "0") + (sign_extend:HI + (match_operand:QI 2 "nonimmediate_operand" "qm"))))) + (const_int 8)) + (zero_extend:HI + (truncate:QI + (div:HI (match_dup 1) (sign_extend:HI (match_dup 2))))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "idiv{b}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "QI")]) + +(define_expand "udivmod<mode>4" + [(parallel [(set (match_operand:SWIM248 0 "register_operand") + (udiv:SWIM248 + (match_operand:SWIM248 1 "register_operand") + (match_operand:SWIM248 2 "nonimmediate_operand"))) + (set (match_operand:SWIM248 3 "register_operand") + (umod:SWIM248 (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])]) + +;; Split with 8bit unsigned divide: +;; if (dividend an divisor are in [0-255]) +;; use 8bit unsigned integer divide +;; else +;; use original integer divide +(define_split + [(set (match_operand:SWI48 0 "register_operand") + (udiv:SWI48 (match_operand:SWI48 2 "register_operand") + (match_operand:SWI48 3 "nonimmediate_operand"))) + (set (match_operand:SWI48 1 "register_operand") + (umod:SWI48 (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_8BIT_IDIV + && TARGET_QIMODE_MATH + && can_create_pseudo_p () + && !optimize_insn_for_size_p ()" + [(const_int 0)] + "ix86_split_idivmod (<MODE>mode, operands, false); DONE;") + +(define_insn_and_split "udivmod<mode>4_1" + [(set (match_operand:SWI48 0 "register_operand" "=a") + (udiv:SWI48 (match_operand:SWI48 2 "register_operand" "0") + (match_operand:SWI48 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWI48 1 "register_operand" "=&d") + (umod:SWI48 (match_dup 2) (match_dup 3))) + (unspec [(const_int 0)] UNSPEC_DIV_ALREADY_SPLIT) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "reload_completed" + [(set (match_dup 1) (const_int 0)) + (parallel [(set (match_dup 0) + (udiv:SWI48 (match_dup 2) (match_dup 3))) + (set (match_dup 1) + (umod:SWI48 (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "type" "multi") + (set_attr "mode" "<MODE>")]) + +(define_insn_and_split "*udivmod<mode>4" + [(set (match_operand:SWIM248 0 "register_operand" "=a") + (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") + (match_operand:SWIM248 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWIM248 1 "register_operand" "=&d") + (umod:SWIM248 (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "reload_completed" + [(set (match_dup 1) (const_int 0)) + (parallel [(set (match_dup 0) + (udiv:SWIM248 (match_dup 2) (match_dup 3))) + (set (match_dup 1) + (umod:SWIM248 (match_dup 2) (match_dup 3))) + (use (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] + "" + [(set_attr "type" "multi") + (set_attr "mode" "<MODE>")]) + +(define_insn "*udivmod<mode>4_noext" + [(set (match_operand:SWIM248 0 "register_operand" "=a") + (udiv:SWIM248 (match_operand:SWIM248 2 "register_operand" "0") + (match_operand:SWIM248 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SWIM248 1 "register_operand" "=d") + (umod:SWIM248 (match_dup 2) (match_dup 3))) + (use (match_operand:SWIM248 4 "register_operand" "1")) + (clobber (reg:CC FLAGS_REG))] + "" + "div{<imodesuffix>}\t%3" + [(set_attr "type" "idiv") + (set_attr "mode" "<MODE>")]) + +(define_expand "udivmodqi4" + [(parallel [(set (match_operand:QI 0 "register_operand") + (udiv:QI + (match_operand:QI 1 "register_operand") + (match_operand:QI 2 "nonimmediate_operand"))) + (set (match_operand:QI 3 "register_operand") + (umod:QI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" +{ + rtx div, mod, insn; + rtx tmp0, tmp1; + + tmp0 = gen_reg_rtx (HImode); + tmp1 = gen_reg_rtx (HImode); + + /* Extend operands[1] to HImode. Generate 8bit divide. Result is + in AX. */ + emit_insn (gen_zero_extendqihi2 (tmp1, operands[1])); + emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, operands[2])); + + /* Extract remainder from AH. */ + tmp1 = gen_rtx_ZERO_EXTRACT (SImode, tmp0, GEN_INT (8), GEN_INT (8)); + tmp1 = simplify_gen_subreg (QImode, tmp1, SImode, 0); + insn = emit_move_insn (operands[3], tmp1); + + mod = gen_rtx_UMOD (QImode, operands[1], operands[2]); + set_unique_reg_note (insn, REG_EQUAL, mod); + + /* Extract quotient from AL. */ + insn = emit_move_insn (operands[0], gen_lowpart (QImode, tmp0)); + + div = gen_rtx_UDIV (QImode, operands[1], operands[2]); + set_unique_reg_note (insn, REG_EQUAL, div); + + DONE; +}) + +(define_insn "udivmodhiqi3" + [(set (match_operand:HI 0 "register_operand" "=a") + (ior:HI + (ashift:HI + (zero_extend:HI + (truncate:QI + (mod:HI (match_operand:HI 1 "register_operand" "0") + (zero_extend:HI + (match_operand:QI 2 "nonimmediate_operand" "qm"))))) + (const_int 8)) + (zero_extend:HI + (truncate:QI + (div:HI (match_dup 1) (zero_extend:HI (match_dup 2))))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "div{b}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "QI")]) + +;; We cannot use div/idiv for double division, because it causes +;; "division by zero" on the overflow and that's not what we expect +;; from truncate. Because true (non truncating) double division is +;; never generated, we can't create this insn anyway. +; +;(define_insn "" +; [(set (match_operand:SI 0 "register_operand" "=a") +; (truncate:SI +; (udiv:DI (match_operand:DI 1 "register_operand" "A") +; (zero_extend:DI +; (match_operand:SI 2 "nonimmediate_operand" "rm"))))) +; (set (match_operand:SI 3 "register_operand" "=d") +; (truncate:SI +; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2))))) +; (clobber (reg:CC FLAGS_REG))] +; "" +; "div{l}\t{%2, %0|%0, %2}" +; [(set_attr "type" "idiv")]) + +;;- Logical AND instructions + +;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al. +;; Note that this excludes ah. + +(define_expand "testsi_ccno_1" + [(set (reg:CCNO FLAGS_REG) + (compare:CCNO + (and:SI (match_operand:SI 0 "nonimmediate_operand") + (match_operand:SI 1 "x86_64_nonmemory_operand")) + (const_int 0)))]) + +(define_expand "testqi_ccz_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand") + (match_operand:QI 1 "nonmemory_operand")) + (const_int 0)))]) + +(define_expand "testdi_ccno_1" + [(set (reg:CCNO FLAGS_REG) + (compare:CCNO + (and:DI (match_operand:DI 0 "nonimmediate_operand") + (match_operand:DI 1 "x86_64_szext_general_operand")) + (const_int 0)))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))") + +(define_insn "*testdi_1" + [(set (reg FLAGS_REG) + (compare + (and:DI + (match_operand:DI 0 "nonimmediate_operand" "%!*a,r,!*a,r,rm") + (match_operand:DI 1 "x86_64_szext_general_operand" "Z,Z,e,e,re")) + (const_int 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + test{l}\t{%k1, %k0|%k0, %k1} + test{l}\t{%k1, %k0|%k0, %k1} + test{q}\t{%1, %0|%0, %1} + test{q}\t{%1, %0|%0, %1} + test{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,0,1,1") + (set_attr "mode" "SI,SI,DI,DI,DI")]) + +(define_insn "*testqi_1_maybe_si" + [(set (reg FLAGS_REG) + (compare + (and:QI + (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r") + (match_operand:QI 1 "general_operand" "n,n,qn,n")) + (const_int 0)))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && ix86_match_ccmode (insn, + CONST_INT_P (operands[1]) + && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)" +{ + if (which_alternative == 3) + { + if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0) + operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff); + return "test{l}\t{%1, %k0|%k0, %1}"; + } + return "test{b}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1,1") + (set_attr "mode" "QI,QI,QI,SI") + (set_attr "pent_pair" "uv,np,uv,np")]) + +(define_insn "*test<mode>_1" + [(set (reg FLAGS_REG) + (compare + (and:SWI124 + (match_operand:SWI124 0 "nonimmediate_operand" "%!*a,<r>,<r>m") + (match_operand:SWI124 1 "<general_operand>" "<i>,<i>,<r><i>")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "test{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1") + (set_attr "mode" "<MODE>") + (set_attr "pent_pair" "uv,np,uv")]) + +(define_expand "testqi_ext_ccno_0" + [(set (reg:CCNO FLAGS_REG) + (compare:CCNO + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand") + (const_int 8) + (const_int 8)) + (match_operand 1 "const_int_operand")) + (const_int 0)))]) + +(define_insn "*testqi_ext_0" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (match_operand 1 "const_int_operand" "n")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "test") + (set_attr "mode" "QI") + (set_attr "length_immediate" "1") + (set_attr "modrm" "1") + (set_attr "pent_pair" "np")]) + +(define_insn "*testqi_ext_1" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 1 "nonimmediate_x64nomem_operand" "Q,m"))) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "isa" "*,nox64") + (set_attr "type" "test") + (set_attr "mode" "QI")]) + +(define_insn "*testqi_ext_2" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8))) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "test") + (set_attr "mode" "QI")]) + +;; Combine likes to form bit extractions for some tests. Humor it. +(define_insn "*testqi_ext_3" + [(set (reg FLAGS_REG) + (compare (zero_extract:SWI48 + (match_operand 0 "nonimmediate_operand" "rm") + (match_operand:SWI48 1 "const_int_operand") + (match_operand:SWI48 2 "const_int_operand")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode) + && ((TARGET_64BIT && GET_MODE (operands[0]) == DImode) + || GET_MODE (operands[0]) == SImode + || GET_MODE (operands[0]) == HImode + || GET_MODE (operands[0]) == QImode) + /* Ensure that resulting mask is zero or sign extended operand. */ + && INTVAL (operands[2]) >= 0 + && ((INTVAL (operands[1]) > 0 + && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32) + || (<MODE>mode == DImode + && INTVAL (operands[1]) > 32 + && INTVAL (operands[1]) + INTVAL (operands[2]) == 64))" + "#") + +(define_split + [(set (match_operand 0 "flags_reg_operand") + (match_operator 1 "compare_operator" + [(zero_extract + (match_operand 2 "nonimmediate_operand") + (match_operand 3 "const_int_operand") + (match_operand 4 "const_int_operand")) + (const_int 0)]))] + "ix86_match_ccmode (insn, CCNOmode)" + [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))] +{ + rtx val = operands[2]; + HOST_WIDE_INT len = INTVAL (operands[3]); + HOST_WIDE_INT pos = INTVAL (operands[4]); + HOST_WIDE_INT mask; + enum machine_mode mode, submode; + + mode = GET_MODE (val); + if (MEM_P (val)) + { + /* ??? Combine likes to put non-volatile mem extractions in QImode + no matter the size of the test. So find a mode that works. */ + if (! MEM_VOLATILE_P (val)) + { + mode = smallest_mode_for_size (pos + len, MODE_INT); + val = adjust_address (val, mode, 0); + } + } + else if (GET_CODE (val) == SUBREG + && (submode = GET_MODE (SUBREG_REG (val)), + GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)) + && pos + len <= GET_MODE_BITSIZE (submode) + && GET_MODE_CLASS (submode) == MODE_INT) + { + /* Narrow a paradoxical subreg to prevent partial register stalls. */ + mode = submode; + val = SUBREG_REG (val); + } + else if (mode == HImode && pos + len <= 8) + { + /* Small HImode tests can be converted to QImode. */ + mode = QImode; + val = gen_lowpart (QImode, val); + } + + if (len == HOST_BITS_PER_WIDE_INT) + mask = -1; + else + mask = ((HOST_WIDE_INT)1 << len) - 1; + mask <<= pos; + + operands[2] = gen_rtx_AND (mode, val, gen_int_mode (mask, mode)); +}) + +;; Convert HImode/SImode test instructions with immediate to QImode ones. +;; i386 does not allow to encode test with 8bit sign extended immediate, so +;; this is relatively important trick. +;; Do the conversion only post-reload to avoid limiting of the register class +;; to QI regs. +(define_split + [(set (match_operand 0 "flags_reg_operand") + (match_operator 1 "compare_operator" + [(and (match_operand 2 "register_operand") + (match_operand 3 "const_int_operand")) + (const_int 0)]))] + "reload_completed + && QI_REG_P (operands[2]) + && GET_MODE (operands[2]) != QImode + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[3]) & ~(255 << 8))) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[3]) & ~(127 << 8))))" + [(set (match_dup 0) + (match_op_dup 1 + [(and:SI (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8)) + (match_dup 3)) + (const_int 0)]))] +{ + operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, SImode); +}) + +(define_split + [(set (match_operand 0 "flags_reg_operand") + (match_operator 1 "compare_operator" + [(and (match_operand 2 "nonimmediate_operand") + (match_operand 3 "const_int_operand")) + (const_int 0)]))] + "reload_completed + && GET_MODE (operands[2]) != QImode + && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2])) + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[3]) & ~255)) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[3]) & ~127)))" + [(set (match_dup 0) + (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) + (const_int 0)]))] +{ + operands[2] = gen_lowpart (QImode, operands[2]); + operands[3] = gen_lowpart (QImode, operands[3]); +}) + +(define_split + [(set (match_operand:SWI12 0 "mask_reg_operand") + (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand") + (match_operand:SWI12 2 "mask_reg_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512F && reload_completed" + [(set (match_dup 0) + (any_logic:SWI12 (match_dup 1) + (match_dup 2)))]) + +(define_insn "*k<logic><mode>" + [(set (match_operand:SWI12 0 "mask_reg_operand" "=k") + (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand" "k") + (match_operand:SWI12 2 "mask_reg_operand" "k")))] + "TARGET_AVX512F" + "k<logic>w\t{%2, %1, %0|%0, %1, %2}"; + [(set_attr "mode" "<MODE>") + (set_attr "type" "msklog") + (set_attr "prefix" "vex")]) + +;; %%% This used to optimize known byte-wide and operations to memory, +;; and sometimes to QImode registers. If this is considered useful, +;; it should be done with splitters. + +(define_expand "and<mode>3" + [(set (match_operand:SWIM 0 "nonimmediate_operand") + (and:SWIM (match_operand:SWIM 1 "nonimmediate_operand") + (match_operand:SWIM 2 "<general_szext_operand>")))] + "" +{ + enum machine_mode mode = <MODE>mode; + rtx (*insn) (rtx, rtx); + + if (CONST_INT_P (operands[2]) && REG_P (operands[0])) + { + HOST_WIDE_INT ival = INTVAL (operands[2]); + + if (ival == (HOST_WIDE_INT) 0xffffffff) + mode = SImode; + else if (ival == 0xffff) + mode = HImode; + else if (ival == 0xff) + mode = QImode; + } + + if (mode == <MODE>mode) + { + ix86_expand_binary_operator (AND, <MODE>mode, operands); + DONE; + } + + if (<MODE>mode == DImode) + insn = (mode == SImode) + ? gen_zero_extendsidi2 + : (mode == HImode) + ? gen_zero_extendhidi2 + : gen_zero_extendqidi2; + else if (<MODE>mode == SImode) + insn = (mode == HImode) + ? gen_zero_extendhisi2 + : gen_zero_extendqisi2; + else if (<MODE>mode == HImode) + insn = gen_zero_extendqihi2; + else + gcc_unreachable (); + + emit_insn (insn (operands[0], gen_lowpart (mode, operands[1]))); + DONE; +}) + +(define_insn "*anddi_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") + (and:DI + (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "#"; + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (get_attr_mode (insn) == MODE_SI) + return "and{l}\t{%k2, %k0|%k0, %k2}"; + else + return "and{q}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "type" "alu,alu,alu,imovx") + (set_attr "length_immediate" "*,*,*,0") + (set (attr "prefix_rex") + (if_then_else + (and (eq_attr "type" "imovx") + (and (match_test "INTVAL (operands[2]) == 0xff") + (match_operand 1 "ext_QIreg_operand"))) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "SI,DI,DI,SI")]) + +(define_insn "*andsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm") + (match_operand:SI 2 "x86_64_general_operand" "re,rm,L"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (AND, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "#"; + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "and{l}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "type" "alu,alu,imovx") + (set (attr "prefix_rex") + (if_then_else + (and (eq_attr "type" "imovx") + (and (match_test "INTVAL (operands[2]) == 0xff") + (match_operand 1 "ext_QIreg_operand"))) + (const_string "1") + (const_string "*"))) + (set_attr "length_immediate" "*,*,0") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*andsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "x86_64_general_operand" "rme")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)" + "and{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*andhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,Ya,!k") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm,k") + (match_operand:HI 2 "general_operand" "rn,rm,L,k"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (AND, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "#"; + + case TYPE_MSKLOG: + return "kandw\t{%2, %1, %0|%0, %1, %2}"; + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "and{w}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "type" "alu,alu,imovx,msklog") + (set_attr "length_immediate" "*,*,0,*") + (set (attr "prefix_rex") + (if_then_else + (and (eq_attr "type" "imovx") + (match_operand 1 "ext_QIreg_operand")) + (const_string "1") + (const_string "*"))) + (set_attr "mode" "HI,HI,SI,HI")]) + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*andqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,!k") + (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") + (match_operand:QI 2 "general_operand" "qn,qmn,rn,k"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (AND, QImode, operands)" + "@ + and{b}\t{%2, %0|%0, %2} + and{b}\t{%2, %0|%0, %2} + and{l}\t{%k2, %k0|%k0, %k2} + kandw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "alu,alu,alu,msklog") + (set_attr "mode" "QI,QI,SI,HI")]) + +(define_insn "*andqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (and:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qmn"))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "and{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "kandn<mode>" + [(set (match_operand:SWI12 0 "register_operand" "=r,&r,!k") + (and:SWI12 + (not:SWI12 + (match_operand:SWI12 1 "register_operand" "r,0,k")) + (match_operand:SWI12 2 "register_operand" "r,r,k"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512F" + "@ + andn\t{%k2, %k1, %k0|%k0, %k1, %k2} + # + kandnw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "bmi,*,avx512f") + (set_attr "type" "bitmanip,*,msklog") + (set_attr "prefix" "*,*,vex") + (set_attr "btver2_decode" "direct,*,*") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:SWI12 0 "general_reg_operand") + (and:SWI12 + (not:SWI12 + (match_dup 0)) + (match_operand:SWI12 1 "general_reg_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512F && !TARGET_BMI && reload_completed" + [(set (match_dup 0) + (not:HI (match_dup 0))) + (parallel [(set (match_dup 0) + (and:HI (match_dup 0) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])]) + +;; Turn *anddi_1 into *andsi_1_zext if possible. +(define_split + [(set (match_operand:DI 0 "register_operand") + (and:DI (subreg:DI (match_operand:SI 1 "register_operand") 0) + (match_operand:DI 2 "x86_64_zext_immediate_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + [(parallel [(set (match_dup 0) + (zero_extend:DI (and:SI (match_dup 1) (match_dup 2)))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = gen_lowpart (SImode, operands[2]);") + +(define_split + [(set (match_operand:SWI248 0 "register_operand") + (and:SWI248 (match_operand:SWI248 1 "nonimmediate_operand") + (match_operand:SWI248 2 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(const_int 0)] +{ + HOST_WIDE_INT ival = INTVAL (operands[2]); + enum machine_mode mode; + rtx (*insn) (rtx, rtx); + + if (ival == (HOST_WIDE_INT) 0xffffffff) + mode = SImode; + else if (ival == 0xffff) + mode = HImode; + else + { + gcc_assert (ival == 0xff); + mode = QImode; + } + + if (<MODE>mode == DImode) + insn = (mode == SImode) + ? gen_zero_extendsidi2 + : (mode == HImode) + ? gen_zero_extendhidi2 + : gen_zero_extendqidi2; + else + { + if (<MODE>mode != SImode) + /* Zero extend to SImode to avoid partial register stalls. */ + operands[0] = gen_lowpart (SImode, operands[0]); + + insn = (mode == HImode) + ? gen_zero_extendhisi2 + : gen_zero_extendqisi2; + } + emit_insn (insn (operands[0], gen_lowpart (mode, operands[1]))); + DONE; +}) + +(define_split + [(set (match_operand 0 "register_operand") + (and (match_dup 0) + (const_int -65536))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL) + || optimize_function_for_size_p (cfun)" + [(set (strict_low_part (match_dup 1)) (const_int 0))] + "operands[1] = gen_lowpart (HImode, operands[0]);") + +(define_split + [(set (match_operand 0 "ext_register_operand") + (and (match_dup 0) + (const_int -256))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && reload_completed" + [(set (strict_low_part (match_dup 1)) (const_int 0))] + "operands[1] = gen_lowpart (QImode, operands[0]);") + +(define_split + [(set (match_operand 0 "ext_register_operand") + (and (match_dup 0) + (const_int -65281))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && reload_completed" + [(parallel [(set (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]);") + +(define_insn "*anddi_2" + [(set (reg FLAGS_REG) + (compare + (and:DI + (match_operand:DI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,rem,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm") + (and:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT + && ix86_match_ccmode + (insn, + /* If we are going to emit andl instead of andq, and the operands[2] + constant might have the SImode sign bit set, make sure the sign + flag isn't tested, because the instruction will set the sign flag + based on bit 31 rather than bit 63. If it isn't CONST_INT, + conservatively assume it might have bit 31 set. */ + (satisfies_constraint_Z (operands[2]) + && (!CONST_INT_P (operands[2]) + || val_signbit_known_set_p (SImode, INTVAL (operands[2])))) + ? CCZmode : CCNOmode) + && ix86_binary_operator_ok (AND, DImode, operands)" + "@ + and{l}\t{%k2, %k0|%k0, %k2} + and{q}\t{%2, %0|%0, %2} + and{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI,DI,DI")]) + +(define_insn "*andqi_2_maybe_si" + [(set (reg FLAGS_REG) + (compare (and:QI + (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qmn,qn,n")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r") + (and:QI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (AND, QImode, operands) + && ix86_match_ccmode (insn, + CONST_INT_P (operands[2]) + && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)" +{ + if (which_alternative == 2) + { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff); + return "and{l}\t{%2, %k0|%k0, %2}"; + } + return "and{b}\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*and<mode>_2" + [(set (reg FLAGS_REG) + (compare (and:SWI124 + (match_operand:SWI124 1 "nonimmediate_operand" "%0,0") + (match_operand:SWI124 2 "<general_operand>" "<g>,<r><i>")) + (const_int 0))) + (set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>,<r>m") + (and:SWI124 (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, <MODE>mode, operands)" + "and{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*andsi_2_zext" + [(set (reg FLAGS_REG) + (compare (and:SI + (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "x86_64_general_operand" "rme")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, SImode, operands)" + "and{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*andqi_2_slp" + [(set (reg FLAGS_REG) + (compare (and:QI + (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "nonimmediate_operand" "qmn,qn")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (and:QI (match_dup 0) (match_dup 1)))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "and{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +;; ??? A bug in recog prevents it from recognizing a const_int as an +;; operand to zero_extend in andqi_ext_1. It was checking explicitly +;; for a QImode operand, which of course failed. +(define_insn "andqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +;; Generated by peephole translating test to and. This shows up +;; often in fp comparisons. +(define_insn "*andqi_ext_0_cc" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_dup 1) + (const_int 8) + (const_int 8)) + (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode)" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0,0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "nonimmediate_x64nomem_operand" "Q,m")))) + (clobber (reg:CC FLAGS_REG))] + "" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "isa" "*,nox64") + (set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "%0") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "" + "and{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +;; Convert wide AND instructions with immediate operand to shorter QImode +;; equivalents when possible. +;; Don't do the splitting with memory operands, since it introduces risk +;; of memory mismatch stalls. We may want to do the splitting for optimizing +;; for size, but that can (should?) be handled by generic code instead. +(define_split + [(set (match_operand 0 "register_operand") + (and (match_operand 1 "register_operand") + (match_operand 2 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(~INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (and:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode); +}) + +;; Since AND can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is not set. +(define_split + [(set (match_operand 0 "register_operand") + (and (match_operand 1 "general_operand") + (match_operand 2 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(~INTVAL (operands[2]) & ~255) + && !(INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (and:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]); +}) + +;; Logical inclusive and exclusive OR instructions + +;; %%% This used to optimize known byte-wide and operations to memory. +;; If this is considered useful, it should be done with splitters. + +(define_expand "<code><mode>3" + [(set (match_operand:SWIM 0 "nonimmediate_operand") + (any_or:SWIM (match_operand:SWIM 1 "nonimmediate_operand") + (match_operand:SWIM 2 "<general_operand>")))] + "" + "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") + +(define_insn "*<code><mode>_1" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm") + (any_or:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "%0,0") + (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" + "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "*<code>hi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,rm,!k") + (any_or:HI + (match_operand:HI 1 "nonimmediate_operand" "%0,0,k") + (match_operand:HI 2 "general_operand" "<g>,r<i>,k"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, HImode, operands)" + "@ + <logic>{w}\t{%2, %0|%0, %2} + <logic>{w}\t{%2, %0|%0, %2} + k<logic>w\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "alu,alu,msklog") + (set_attr "mode" "HI")]) + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*<code>qi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r,!k") + (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") + (match_operand:QI 2 "general_operand" "qmn,qn,rn,k"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, QImode, operands)" + "@ + <logic>{b}\t{%2, %0|%0, %2} + <logic>{b}\t{%2, %0|%0, %2} + <logic>{l}\t{%k2, %k0|%k0, %k2} + k<logic>w\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "alu,alu,alu,msklog") + (set_attr "mode" "QI,QI,SI,HI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*<code>si_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "x86_64_general_operand" "rme")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)" + "<logic>{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*<code>si_1_zext_imm" + [(set (match_operand:DI 0 "register_operand" "=r") + (any_or:DI + (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) + (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)" + "<logic>{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*<code>qi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+q,m")) + (any_or:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qmn,qn"))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "<logic>{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*<code><mode>_2" + [(set (reg FLAGS_REG) + (compare (any_or:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0,0") + (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m") + (any_or:SWI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" + "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "kxnor<mode>" + [(set (match_operand:SWI12 0 "register_operand" "=r,!k") + (not:SWI12 + (xor:SWI12 + (match_operand:SWI12 1 "register_operand" "0,k") + (match_operand:SWI12 2 "register_operand" "r,k")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512F" + "@ + # + kxnorw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "*,msklog") + (set_attr "prefix" "*,vex") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:SWI12 0 "general_reg_operand") + (not:SWI12 + (xor:SWI12 + (match_dup 0) + (match_operand:SWI12 1 "general_reg_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512F && reload_completed" + [(parallel [(set (match_dup 0) + (xor:HI (match_dup 0) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) + (not:HI (match_dup 0)))]) + +(define_insn "kortestzhi" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (ior:HI + (match_operand:HI 0 "register_operand" "k") + (match_operand:HI 1 "register_operand" "k")) + (const_int 0)))] + "TARGET_AVX512F && ix86_match_ccmode (insn, CCZmode)" + "kortestw\t{%1, %0|%0, %1}" + [(set_attr "mode" "HI") + (set_attr "type" "msklog") + (set_attr "prefix" "vex")]) + +(define_insn "kortestchi" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (ior:HI + (match_operand:HI 0 "register_operand" "k") + (match_operand:HI 1 "register_operand" "k")) + (const_int -1)))] + "TARGET_AVX512F && ix86_match_ccmode (insn, CCCmode)" + "kortestw\t{%1, %0|%0, %1}" + [(set_attr "mode" "HI") + (set_attr "type" "msklog") + (set_attr "prefix" "vex")]) + +(define_insn "kunpckhi" + [(set (match_operand:HI 0 "register_operand" "=k") + (ior:HI + (ashift:HI + (match_operand:HI 1 "register_operand" "k") + (const_int 8)) + (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))] + "TARGET_AVX512F" + "kunpckbw\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mode" "HI") + (set_attr "type" "msklog") + (set_attr "prefix" "vex")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +;; ??? Special case for immediate operand is missing - it is tricky. +(define_insn "*<code>si_2_zext" + [(set (reg FLAGS_REG) + (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "x86_64_general_operand" "rme")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (<CODE>, SImode, operands)" + "<logic>{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*<code>si_2_zext_imm" + [(set (reg FLAGS_REG) + (compare (any_or:SI + (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (<CODE>, SImode, operands)" + "<logic>{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*<code>qi_2_slp" + [(set (reg FLAGS_REG) + (compare (any_or:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "general_operand" "qmn,qn")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (any_or:QI (match_dup 0) (match_dup 1)))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "<logic>{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*<code><mode>_3" + [(set (reg FLAGS_REG) + (compare (any_or:SWI + (match_operand:SWI 1 "nonimmediate_operand" "%0") + (match_operand:SWI 2 "<general_operand>" "<g>")) + (const_int 0))) + (clobber (match_scratch:SWI 0 "=<r>"))] + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "*<code>qi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (any_or:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "<logic>{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +(define_insn "*<code>qi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") + (const_int 8) + (const_int 8)) + (any_or:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0,0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "nonimmediate_x64nomem_operand" "Q,m")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "<logic>{b}\t{%2, %h0|%h0, %2}" + [(set_attr "isa" "*,nox64") + (set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*<code>qi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (any_or:SI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "<logic>{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_split + [(set (match_operand 0 "register_operand") + (any_or (match_operand 1 "register_operand") + (match_operand 2 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (any_or:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode); +}) + +;; Since OR can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is set. +(define_split + [(set (match_operand 0 "register_operand") + (any_or (match_operand 1 "general_operand") + (match_operand 2 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(INTVAL (operands[2]) & ~255) + && (INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (any_or:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]); +}) + +(define_expand "xorqi_cc_ext_1" + [(parallel [ + (set (reg:CCNO FLAGS_REG) + (compare:CCNO + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "const_int_operand")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_dup 1) + (const_int 8) + (const_int 8)) + (match_dup 2)))])]) + +(define_insn "*xorqi_cc_ext_1" + [(set (reg FLAGS_REG) + (compare + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0,0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_x64nomem_operand" "Qn,m")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_dup 1) + (const_int 8) + (const_int 8)) + (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "isa" "*,nox64") + (set_attr "type" "alu") + (set_attr "modrm" "1") + (set_attr "mode" "QI")]) + +;; Negation instructions + +(define_expand "neg<mode>2" + [(set (match_operand:SDWIM 0 "nonimmediate_operand") + (neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))] + "" + "ix86_expand_unary_operator (NEG, <MODE>mode, operands); DONE;") + +(define_insn_and_split "*neg<dwi>2_doubleword" + [(set (match_operand:<DWI> 0 "nonimmediate_operand" "=ro") + (neg:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_unary_operator_ok (NEG, <DWI>mode, operands)" + "#" + "reload_completed" + [(parallel + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:DWIH (match_dup 1)) (const_int 0))) + (set (match_dup 0) (neg:DWIH (match_dup 1)))]) + (parallel + [(set (match_dup 2) + (plus:DWIH (match_dup 3) + (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0)) + (const_int 0)))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 2) + (neg:DWIH (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "split_double_mode (<DWI>mode, &operands[0], 2, &operands[0], &operands[2]);") + +(define_insn "*neg<mode>2_1" + [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_unary_operator_ok (NEG, <MODE>mode, operands)" + "neg{<imodesuffix>}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "<MODE>")]) + +;; Combine is quite creative about this pattern. +(define_insn "*negsi2_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI + (neg:DI (ashift:DI (match_operand:DI 1 "register_operand" "0") + (const_int 32))) + (const_int 32))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +;; The problem with neg is that it does not perform (compare x 0), +;; it really performs (compare 0 x), which leaves us with the zero +;; flag being the only useful item. + +(define_insn "*neg<mode>2_cmpz" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (neg:SWI (match_dup 1)))] + "ix86_unary_operator_ok (NEG, <MODE>mode, operands)" + "neg{<imodesuffix>}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "<MODE>")]) + +(define_insn "*negsi2_cmpz_zext" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (lshiftrt:DI + (neg:DI (ashift:DI + (match_operand:DI 1 "register_operand" "0") + (const_int 32))) + (const_int 32)) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (neg:DI (ashift:DI (match_dup 1) + (const_int 32))) + (const_int 32)))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +;; Negate with jump on overflow. +(define_expand "negv<mode>3" + [(parallel [(set (reg:CCO FLAGS_REG) + (ne:CCO (match_operand:SWI 1 "register_operand") + (match_dup 3))) + (set (match_operand:SWI 0 "register_operand") + (neg:SWI (match_dup 1)))]) + (set (pc) (if_then_else + (eq (reg:CCO FLAGS_REG) (const_int 0)) + (label_ref (match_operand 2)) + (pc)))] + "" +{ + operands[3] + = gen_int_mode (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1), + <MODE>mode); +}) + +(define_insn "*negv<mode>3" + [(set (reg:CCO FLAGS_REG) + (ne:CCO (match_operand:SWI 1 "nonimmediate_operand" "0") + (match_operand:SWI 2 "const_int_operand"))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (neg:SWI (match_dup 1)))] + "ix86_unary_operator_ok (NEG, <MODE>mode, operands) + && mode_signbit_p (<MODE>mode, operands[2])" + "neg{<imodesuffix>}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "<MODE>")]) + +;; Changing of sign for FP values is doable using integer unit too. + +(define_expand "<code><mode>2" + [(set (match_operand:X87MODEF 0 "register_operand") + (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" + "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;") + +(define_insn "*absneg<mode>2_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r") + (match_operator:MODEF 3 "absneg_operator" + [(match_operand:MODEF 1 "register_operand" "0,x,0,0")])) + (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,X,X")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (<MODE>mode)" + "#") + +(define_insn "*absneg<mode>2_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,!r") + (match_operator:MODEF 3 "absneg_operator" + [(match_operand:MODEF 1 "register_operand" "0 ,x,0")])) + (use (match_operand:<ssevecmode> 2 "register_operand" "xm,0,X")) + (clobber (reg:CC FLAGS_REG))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" + "#") + +(define_insn "*absneg<mode>2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r") + (match_operator:X87MODEF 3 "absneg_operator" + [(match_operand:X87MODEF 1 "register_operand" "0,0")])) + (use (match_operand 2)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" + "#") + +(define_expand "<code>tf2" + [(set (match_operand:TF 0 "register_operand") + (absneg:TF (match_operand:TF 1 "register_operand")))] + "TARGET_SSE" + "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;") + +(define_insn "*absnegtf2_sse" + [(set (match_operand:TF 0 "register_operand" "=x,x") + (match_operator:TF 3 "absneg_operator" + [(match_operand:TF 1 "register_operand" "0,x")])) + (use (match_operand:TF 2 "nonimmediate_operand" "xm,0")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_SSE" + "#") + +;; Splitters for fp abs and neg. + +(define_split + [(set (match_operand 0 "fp_register_operand") + (match_operator 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2)) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))]) + +(define_split + [(set (match_operand 0 "register_operand") + (match_operator 3 "absneg_operator" + [(match_operand 1 "register_operand")])) + (use (match_operand 2 "nonimmediate_operand")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 0) (match_dup 3))] +{ + enum machine_mode mode = GET_MODE (operands[0]); + enum machine_mode vmode = GET_MODE (operands[2]); + rtx tmp; + + operands[0] = simplify_gen_subreg (vmode, operands[0], mode, 0); + operands[1] = simplify_gen_subreg (vmode, operands[1], mode, 0); + if (operands_match_p (operands[0], operands[2])) + { + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } + if (GET_CODE (operands[3]) == ABS) + tmp = gen_rtx_AND (vmode, operands[1], operands[2]); + else + tmp = gen_rtx_XOR (vmode, operands[1], operands[2]); + operands[3] = tmp; +}) + +(define_split + [(set (match_operand:SF 0 "register_operand") + (match_operator:SF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand:V4SF 2)) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + operands[0] = gen_lowpart (SImode, operands[0]); + if (GET_CODE (operands[1]) == ABS) + { + tmp = gen_int_mode (0x7fffffff, SImode); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = gen_int_mode (0x80000000, SImode); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + operands[1] = tmp; +}) + +(define_split + [(set (match_operand:DF 0 "register_operand") + (match_operator:DF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2)) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + if (TARGET_64BIT) + { + tmp = gen_lowpart (DImode, operands[0]); + tmp = gen_rtx_ZERO_EXTRACT (DImode, tmp, const1_rtx, GEN_INT (63)); + operands[0] = tmp; + + if (GET_CODE (operands[1]) == ABS) + tmp = const0_rtx; + else + tmp = gen_rtx_NOT (DImode, tmp); + } + else + { + operands[0] = gen_highpart (SImode, operands[0]); + if (GET_CODE (operands[1]) == ABS) + { + tmp = gen_int_mode (0x7fffffff, SImode); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = gen_int_mode (0x80000000, SImode); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + } + operands[1] = tmp; +}) + +(define_split + [(set (match_operand:XF 0 "register_operand") + (match_operator:XF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2)) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + operands[0] = gen_rtx_REG (SImode, + true_regnum (operands[0]) + + (TARGET_64BIT ? 1 : 2)); + if (GET_CODE (operands[1]) == ABS) + { + tmp = GEN_INT (0x7fff); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = GEN_INT (0x8000); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + operands[1] = tmp; +}) + +;; Conditionalize these after reload. If they match before reload, we +;; lose the clobber and ability to use integer instructions. + +(define_insn "*<code><mode>2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))] + "TARGET_80387 + && (reload_completed + || !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))" + "f<absneg_mnemonic>" + [(set_attr "type" "fsgn") + (set_attr "mode" "<MODE>")]) + +(define_insn "*<code>extendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (absneg:DF (float_extend:DF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)" + "f<absneg_mnemonic>" + [(set_attr "type" "fsgn") + (set_attr "mode" "DF")]) + +(define_insn "*<code>extendsfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (absneg:XF (float_extend:XF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387" + "f<absneg_mnemonic>" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) + +(define_insn "*<code>extenddfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (absneg:XF (float_extend:XF + (match_operand:DF 1 "register_operand" "0"))))] + "TARGET_80387" + "f<absneg_mnemonic>" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) + +;; Copysign instructions + +(define_mode_iterator CSGNMODE [SF DF TF]) +(define_mode_attr CSGNVMODE [(SF "V4SF") (DF "V2DF") (TF "TF")]) + +(define_expand "copysign<mode>3" + [(match_operand:CSGNMODE 0 "register_operand") + (match_operand:CSGNMODE 1 "nonmemory_operand") + (match_operand:CSGNMODE 2 "register_operand")] + "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || (TARGET_SSE && (<MODE>mode == TFmode))" + "ix86_expand_copysign (operands); DONE;") + +(define_insn_and_split "copysign<mode>3_const" + [(set (match_operand:CSGNMODE 0 "register_operand" "=x") + (unspec:CSGNMODE + [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "xmC") + (match_operand:CSGNMODE 2 "register_operand" "0") + (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")] + UNSPEC_COPYSIGN))] + "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || (TARGET_SSE && (<MODE>mode == TFmode))" + "#" + "&& reload_completed" + [(const_int 0)] + "ix86_split_copysign_const (operands); DONE;") + +(define_insn "copysign<mode>3_var" + [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x") + (unspec:CSGNMODE + [(match_operand:CSGNMODE 2 "register_operand" "x,0,0,x,x") + (match_operand:CSGNMODE 3 "register_operand" "1,1,x,1,x") + (match_operand:<CSGNVMODE> 4 "nonimmediate_operand" "X,xm,xm,0,0") + (match_operand:<CSGNVMODE> 5 "nonimmediate_operand" "0,xm,1,xm,1")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))] + "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || (TARGET_SSE && (<MODE>mode == TFmode))" + "#") + +(define_split + [(set (match_operand:CSGNMODE 0 "register_operand") + (unspec:CSGNMODE + [(match_operand:CSGNMODE 2 "register_operand") + (match_operand:CSGNMODE 3 "register_operand") + (match_operand:<CSGNVMODE> 4) + (match_operand:<CSGNVMODE> 5)] + UNSPEC_COPYSIGN)) + (clobber (match_scratch:<CSGNVMODE> 1))] + "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || (TARGET_SSE && (<MODE>mode == TFmode))) + && reload_completed" + [(const_int 0)] + "ix86_split_copysign_var (operands); DONE;") + +;; One complement instructions + +(define_expand "one_cmpl<mode>2" + [(set (match_operand:SWIM 0 "nonimmediate_operand") + (not:SWIM (match_operand:SWIM 1 "nonimmediate_operand")))] + "" + "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;") + +(define_insn "*one_cmpl<mode>2_1" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") + (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0")))] + "ix86_unary_operator_ok (NOT, <MODE>mode, operands)" + "not{<imodesuffix>}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "<MODE>")]) + +(define_insn "*one_cmplhi2_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,!k") + (not:HI (match_operand:HI 1 "nonimmediate_operand" "0,k")))] + "ix86_unary_operator_ok (NOT, HImode, operands)" + "@ + not{w}\t%0 + knotw\t{%1, %0|%0, %1}" + [(set_attr "isa" "*,avx512f") + (set_attr "type" "negnot,msklog") + (set_attr "prefix" "*,vex") + (set_attr "mode" "HI")]) + +;; %%% Potential partial reg stall on alternative 1. What to do? +(define_insn "*one_cmplqi2_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!k") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))] + "ix86_unary_operator_ok (NOT, QImode, operands)" + "@ + not{b}\t%0 + not{l}\t%k0 + knotw\t{%1, %0|%0, %1}" + [(set_attr "isa" "*,*,avx512f") + (set_attr "type" "negnot,negnot,msklog") + (set_attr "prefix" "*,*,vex") + (set_attr "mode" "QI,SI,QI")]) + +;; ??? Currently never generated - xor is used instead. +(define_insn "*one_cmplsi2_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (not:SI (match_operand:SI 1 "register_operand" "0"))))] + "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)" + "not{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +(define_insn "*one_cmpl<mode>2_2" + [(set (reg FLAGS_REG) + (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (not:SWI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, <MODE>mode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand") + (match_operator 2 "compare_operator" + [(not:SWI (match_operand:SWI 3 "nonimmediate_operand")) + (const_int 0)])) + (set (match_operand:SWI 1 "nonimmediate_operand") + (not:SWI (match_dup 3)))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:SWI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:SWI (match_dup 3) (const_int -1)))])]) + +;; ??? Currently never generated - xor is used instead. +(define_insn "*one_cmplsi2_2_zext" + [(set (reg FLAGS_REG) + (compare (not:SI (match_operand:SI 1 "register_operand" "0")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (not:SI (match_dup 1))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, SImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand") + (match_operator 2 "compare_operator" + [(not:SI (match_operand:SI 3 "register_operand")) + (const_int 0)])) + (set (match_operand:DI 1 "register_operand") + (zero_extend:DI (not:SI (match_dup 3))))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])]) + +;; Shift instructions + +;; DImode shifts are implemented using the i386 "shift double" opcode, +;; which is written as "sh[lr]d[lw] imm,reg,reg/mem". If the shift count +;; is variable, then the count is in %cl and the "imm" operand is dropped +;; from the assembler input. +;; +;; This instruction shifts the target reg/mem as usual, but instead of +;; shifting in zeros, bits are shifted in from reg operand. If the insn +;; is a left shift double, bits are taken from the high order bits of +;; reg, else if the insn is a shift right double, bits are taken from the +;; low order bits of reg. So if %eax is "1234" and %edx is "5678", +;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345". +;; +;; Since sh[lr]d does not change the `reg' operand, that is done +;; separately, making all shifts emit pairs of shift double and normal +;; shift. Since sh[lr]d does not shift more than 31 bits, and we wish to +;; support a 63 bit shift, each shift where the count is in a reg expands +;; to a pair of shifts, a branch, a shift by 32 and a label. +;; +;; If the shift count is a constant, we need never emit more than one +;; shift pair, instead using moves and sign extension for counts greater +;; than 31. + +(define_expand "ashl<mode>3" + [(set (match_operand:SDWIM 0 "<shift_operand>") + (ashift:SDWIM (match_operand:SDWIM 1 "<ashl_input_operand>") + (match_operand:QI 2 "nonmemory_operand")))] + "" + "ix86_expand_binary_operator (ASHIFT, <MODE>mode, operands); DONE;") + +(define_insn "*ashl<mode>3_doubleword" + [(set (match_operand:DWI 0 "register_operand" "=&r,r") + (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "n,0") + (match_operand:QI 2 "nonmemory_operand" "<S>c,<S>c"))) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + [(set_attr "type" "multi")]) + +(define_split + [(set (match_operand:DWI 0 "register_operand") + (ashift:DWI (match_operand:DWI 1 "nonmemory_operand") + (match_operand:QI 2 "nonmemory_operand"))) + (clobber (reg:CC FLAGS_REG))] + "(optimize && flag_peephole2) ? epilogue_completed : reload_completed" + [(const_int 0)] + "ix86_split_ashl (operands, NULL_RTX, <MODE>mode); DONE;") + +;; By default we don't ask for a scratch register, because when DWImode +;; values are manipulated, registers are already at a premium. But if +;; we have one handy, we won't turn it away. + +(define_peephole2 + [(match_scratch:DWIH 3 "r") + (parallel [(set (match_operand:<DWI> 0 "register_operand") + (ashift:<DWI> + (match_operand:<DWI> 1 "nonmemory_operand") + (match_operand:QI 2 "nonmemory_operand"))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "TARGET_CMOVE" + [(const_int 0)] + "ix86_split_ashl (operands, operands[3], <DWI>mode); DONE;") + +(define_insn "x86_64_shld" + [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") + (ior:DI (ashift:DI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Jc")) + (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") + (minus:QI (const_int 64) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "shld{q}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) + +(define_insn "x86_shld" + [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") + (ior:SI (ashift:SI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Ic")) + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "" + "shld{l}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) + +(define_expand "x86_shift<mode>_adj_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 2 "register_operand") + (match_dup 4)) + (const_int 0))) + (set (match_operand:SWI48 0 "register_operand") + (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:SWI48 1 "register_operand") + (match_dup 0))) + (set (match_dup 1) + (if_then_else:SWI48 (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:SWI48 3 "register_operand") + (match_dup 1)))] + "TARGET_CMOVE" + "operands[4] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));") + +(define_expand "x86_shift<mode>_adj_2" + [(use (match_operand:SWI48 0 "register_operand")) + (use (match_operand:SWI48 1 "register_operand")) + (use (match_operand:QI 2 "register_operand"))] + "" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], + GEN_INT (GET_MODE_BITSIZE (<MODE>mode)))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + ix86_expand_clear (operands[1]); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +;; Avoid useless masking of count operand. +(define_insn "*ashl<mode>3_mask" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") + (ashift:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0") + (subreg:QI + (and:SI + (match_operand:SI 2 "register_operand" "c") + (match_operand:SI 3 "const_int_operand" "n")) 0))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands) + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) + == GET_MODE_BITSIZE (<MODE>mode)-1" +{ + return "sal{<imodesuffix>}\t{%b2, %0|%0, %b2}"; +} + [(set_attr "type" "ishift") + (set_attr "mode" "<MODE>")]) + +(define_insn "*bmi2_ashl<mode>3_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:SWI48 2 "register_operand" "r")))] + "TARGET_BMI2" + "shlx\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ishiftx") + (set_attr "mode" "<MODE>")]) + +(define_insn "*ashl<mode>3_1" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm") + (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + case TYPE_ISHIFTX: + return "#"; + + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "add{<imodesuffix>}\t%0, %0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{<imodesuffix>}\t%0"; + else + return "sal{<imodesuffix>}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "isa" "*,*,bmi2") + (set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (eq_attr "alternative" "2") + (const_string "ishiftx") + (and (and (match_test "TARGET_DOUBLE_WITH_ADD") + (match_operand 0 "register_operand")) + (match_operand 2 "const1_operand")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +;; Convert shift to the shiftx pattern to avoid flags dependency. +(define_split + [(set (match_operand:SWI48 0 "register_operand") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") + (match_operand:QI 2 "register_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (ashift:SWI48 (match_dup 1) (match_dup 2)))] + "operands[2] = gen_lowpart (<MODE>mode, operands[2]);") + +(define_insn "*bmi2_ashlsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "register_operand" "r"))))] + "TARGET_64BIT && TARGET_BMI2" + "shlx\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "type" "ishiftx") + (set_attr "mode" "SI")]) + +(define_insn "*ashlsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (zero_extend:DI + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,M,r")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + case TYPE_ISHIFTX: + return "#"; + + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t%k0, %k0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{l}\t%k0"; + else + return "sal{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set_attr "isa" "*,*,bmi2") + (set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (eq_attr "alternative" "2") + (const_string "ishiftx") + (and (match_test "TARGET_DOUBLE_WITH_ADD") + (match_operand 2 "const1_operand")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "SI")]) + +;; Convert shift to the shiftx pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (ashift:SI (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 2 "register_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] + "operands[2] = gen_lowpart (SImode, operands[2]);") + +(define_insn "*ashlhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l") + (match_operand:QI 2 "nonmemory_operand" "cI,M"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t%0, %0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (and (match_test "TARGET_DOUBLE_WITH_ADD") + (match_operand 0 "register_operand")) + (match_operand 2 "const1_operand")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "HI,SI")]) + +;; %%% Potential partial reg stall on alternative 1. What to do? +(define_insn "*ashlqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l") + (match_operand:QI 2 "nonmemory_operand" "cI,cI,M"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFT, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1])) + return "add{l}\t%k0, %k0"; + else + return "add{b}\t%0, %0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t%k0"; + else + return "sal{b}\t%0"; + } + else + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%2, %k0|%k0, %2}"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "lea") + (and (and (match_test "TARGET_DOUBLE_WITH_ADD") + (match_operand 0 "register_operand")) + (match_operand 2 "const1_operand")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI,SI,SI")]) + +(define_insn "*ashlqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (ashift:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "cI"))) + (clobber (reg:CC FLAGS_REG))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[1] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[1] == const1_rtx); + return "add{b}\t%0, %0"; + + default: + if (operands[1] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{b}\t%0"; + else + return "sal{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD") + (match_operand 0 "register_operand")) + (match_operand 1 "const1_operand")) + (const_string "alu") + ] + (const_string "ishift1"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift1") + (and (match_operand 1 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +;; Convert ashift to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand 0 "register_operand") + (ashift (match_operand 1 "index_register_operand") + (match_operand:QI 2 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))] + "GET_MODE (operands[0]) == GET_MODE (operands[1]) + && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(const_int 0)] +{ + enum machine_mode mode = GET_MODE (operands[0]); + rtx pat; + + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)) + { + mode = SImode; + operands[0] = gen_lowpart (mode, operands[0]); + operands[1] = gen_lowpart (mode, operands[1]); + } + + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), mode); + + pat = gen_rtx_MULT (mode, operands[1], operands[2]); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +}) + +;; Convert ashift to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (ashift:SI (match_operand:SI 1 "index_register_operand") + (match_operand:QI 2 "const_int_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) + (zero_extend:DI (mult:SI (match_dup 1) (match_dup 2))))] +{ + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), SImode); +}) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashl<mode>3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "<shift_immediate_operand>" "<S>")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (ashift:SWI (match_dup 1) (match_dup 2)))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{<imodesuffix>}\t%0, %0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{<imodesuffix>}\t%0"; + else + return "sal{<imodesuffix>}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD") + (match_operand 0 "register_operand")) + (match_operand 2 "const1_operand")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +(define_insn "*ashlsi3_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t%k0, %k0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{l}\t%k0"; + else + return "sal{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD") + (match_operand 2 "const1_operand")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "SI")]) + +(define_insn "*ashl<mode>3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:SWI (match_operand:SWI 1 "register_operand" "0") + (match_operand:QI 2 "<shift_immediate_operand>" "<S>")) + (const_int 0))) + (clobber (match_scratch:SWI 0 "=<r>"))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{<imodesuffix>}\t%0, %0"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{<imodesuffix>}\t%0"; + else + return "sal{<imodesuffix>}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD") + (match_operand 0 "register_operand")) + (match_operand 2 "const1_operand")) + (const_string "alu") + ] + (const_string "ishift"))) + (set (attr "length_immediate") + (if_then_else + (ior (eq_attr "type" "alu") + (and (eq_attr "type" "ishift") + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +;; See comment above `ashl<mode>3' about how this works. + +(define_expand "<shift_insn><mode>3" + [(set (match_operand:SDWIM 0 "<shift_operand>") + (any_shiftrt:SDWIM (match_operand:SDWIM 1 "<shift_operand>") + (match_operand:QI 2 "nonmemory_operand")))] + "" + "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") + +;; Avoid useless masking of count operand. +(define_insn "*<shift_insn><mode>3_mask" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") + (any_shiftrt:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0") + (subreg:QI + (and:SI + (match_operand:SI 2 "register_operand" "c") + (match_operand:SI 3 "const_int_operand" "n")) 0))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) + == GET_MODE_BITSIZE (<MODE>mode)-1" +{ + return "<shift>{<imodesuffix>}\t{%b2, %0|%0, %b2}"; +} + [(set_attr "type" "ishift") + (set_attr "mode" "<MODE>")]) + +(define_insn_and_split "*<shift_insn><mode>3_doubleword" + [(set (match_operand:DWI 0 "register_operand" "=r") + (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "<S>c"))) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "(optimize && flag_peephole2) ? epilogue_completed : reload_completed" + [(const_int 0)] + "ix86_split_<shift_insn> (operands, NULL_RTX, <MODE>mode); DONE;" + [(set_attr "type" "multi")]) + +;; By default we don't ask for a scratch register, because when DWImode +;; values are manipulated, registers are already at a premium. But if +;; we have one handy, we won't turn it away. + +(define_peephole2 + [(match_scratch:DWIH 3 "r") + (parallel [(set (match_operand:<DWI> 0 "register_operand") + (any_shiftrt:<DWI> + (match_operand:<DWI> 1 "register_operand") + (match_operand:QI 2 "nonmemory_operand"))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "TARGET_CMOVE" + [(const_int 0)] + "ix86_split_<shift_insn> (operands, operands[3], <DWI>mode); DONE;") + +(define_insn "x86_64_shrd" + [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") + (ior:DI (ashiftrt:DI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Jc")) + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (minus:QI (const_int 64) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "shrd{q}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) + +(define_insn "x86_shrd" + [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") + (ior:SI (ashiftrt:SI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Ic")) + (ashift:SI (match_operand:SI 1 "register_operand" "r") + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "" + "shrd{l}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector") + (set_attr "bdver1_decode" "vector")]) + +(define_insn "ashrdi3_cvt" + [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm") + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0") + (match_operand:QI 2 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && INTVAL (operands[2]) == 63 + && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "@ + {cqto|cqo} + sar{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "DI")]) + +(define_insn "ashrsi3_cvt" + [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0") + (match_operand:QI 2 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))] + "INTVAL (operands[2]) == 31 + && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + {cltd|cdq} + sar{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_cvt_zext" + [(set (match_operand:DI 0 "register_operand" "=*d,r") + (zero_extend:DI + (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0") + (match_operand:QI 2 "const_int_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && INTVAL (operands[2]) == 31 + && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + {cltd|cdq} + sar{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "SI")]) + +(define_expand "x86_shift<mode>_adj_3" + [(use (match_operand:SWI48 0 "register_operand")) + (use (match_operand:SWI48 1 "register_operand")) + (use (match_operand:QI 2 "register_operand"))] + "" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], + GEN_INT (GET_MODE_BITSIZE (<MODE>mode)))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + emit_insn (gen_ashr<mode>3_cvt (operands[1], operands[1], + GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1))); + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +(define_insn "*bmi2_<shift_insn><mode>3_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:SWI48 2 "register_operand" "r")))] + "TARGET_BMI2" + "<shift>x\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ishiftx") + (set_attr "mode" "<MODE>")]) + +(define_insn "*<shift_insn><mode>3_1" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") + (any_shiftrt:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "nonmemory_operand" "c<S>,r"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ISHIFTX: + return "#"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "<shift>{<imodesuffix>}\t%0"; + else + return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "isa" "*,bmi2") + (set_attr "type" "ishift,ishiftx") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +;; Convert shift to the shiftx pattern to avoid flags dependency. +(define_split + [(set (match_operand:SWI48 0 "register_operand") + (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") + (match_operand:QI 2 "register_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))] + "operands[2] = gen_lowpart (<MODE>mode, operands[2]);") + +(define_insn "*bmi2_<shift_insn>si3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "register_operand" "r"))))] + "TARGET_64BIT && TARGET_BMI2" + "<shift>x\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "type" "ishiftx") + (set_attr "mode" "SI")]) + +(define_insn "*<shift_insn>si3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,r")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ISHIFTX: + return "#"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "<shift>{l}\t%k0"; + else + return "<shift>{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set_attr "isa" "*,bmi2") + (set_attr "type" "ishift,ishiftx") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "SI")]) + +;; Convert shift to the shiftx pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 2 "register_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))] + "operands[2] = gen_lowpart (SImode, operands[2]);") + +(define_insn "*<shift_insn><mode>3_1" + [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m") + (any_shiftrt:SWI12 + (match_operand:SWI12 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "c<S>"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "<shift>{<imodesuffix>}\t%0"; + else + return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "ishift") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +(define_insn "*<shift_insn>qi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (any_shiftrt:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "cI"))) + (clobber (reg:CC FLAGS_REG))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_REG_STALL + || (operands[1] == const1_rtx + && TARGET_SHIFT1))" +{ + if (operands[1] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "<shift>{b}\t%0"; + else + return "<shift>{b}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "ishift1") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 1 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*<shift_insn><mode>3_cmp" + [(set (reg FLAGS_REG) + (compare + (any_shiftrt:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "<shift_immediate_operand>" "<S>")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (any_shiftrt:SWI (match_dup 1) (match_dup 2)))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && TARGET_SHIFT1)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "<shift>{<imodesuffix>}\t%0"; + else + return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "ishift") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +(define_insn "*<shift_insn>si3_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && TARGET_SHIFT1)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (<CODE>, SImode, operands)" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "<shift>{l}\t%k0"; + else + return "<shift>{l}\t{%2, %k0|%k0, %2}"; +} + [(set_attr "type" "ishift") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "SI")]) + +(define_insn "*<shift_insn><mode>3_cconly" + [(set (reg FLAGS_REG) + (compare + (any_shiftrt:SWI + (match_operand:SWI 1 "register_operand" "0") + (match_operand:QI 2 "<shift_immediate_operand>" "<S>")) + (const_int 0))) + (clobber (match_scratch:SWI 0 "=<r>"))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && TARGET_SHIFT1)) + && ix86_match_ccmode (insn, CCGOCmode)" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "<shift>{<imodesuffix>}\t%0"; + else + return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "ishift") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +;; Rotate instructions + +(define_expand "<rotate_insn>ti3" + [(set (match_operand:TI 0 "register_operand") + (any_rotate:TI (match_operand:TI 1 "register_operand") + (match_operand:QI 2 "nonmemory_operand")))] + "TARGET_64BIT" +{ + if (const_1_to_63_operand (operands[2], VOIDmode)) + emit_insn (gen_ix86_<rotate_insn>ti3_doubleword + (operands[0], operands[1], operands[2])); + else + FAIL; + + DONE; +}) + +(define_expand "<rotate_insn>di3" + [(set (match_operand:DI 0 "shiftdi_operand") + (any_rotate:DI (match_operand:DI 1 "shiftdi_operand") + (match_operand:QI 2 "nonmemory_operand")))] + "" +{ + if (TARGET_64BIT) + ix86_expand_binary_operator (<CODE>, DImode, operands); + else if (const_1_to_31_operand (operands[2], VOIDmode)) + emit_insn (gen_ix86_<rotate_insn>di3_doubleword + (operands[0], operands[1], operands[2])); + else + FAIL; + + DONE; +}) + +(define_expand "<rotate_insn><mode>3" + [(set (match_operand:SWIM124 0 "nonimmediate_operand") + (any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand") + (match_operand:QI 2 "nonmemory_operand")))] + "" + "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") + +;; Avoid useless masking of count operand. +(define_insn "*<rotate_insn><mode>3_mask" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") + (any_rotate:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0") + (subreg:QI + (and:SI + (match_operand:SI 2 "register_operand" "c") + (match_operand:SI 3 "const_int_operand" "n")) 0))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) + == GET_MODE_BITSIZE (<MODE>mode)-1" +{ + return "<rotate>{<imodesuffix>}\t{%b2, %0|%0, %b2}"; +} + [(set_attr "type" "rotate") + (set_attr "mode" "<MODE>")]) + +;; Implement rotation using two double-precision +;; shift instructions and a scratch register. + +(define_insn_and_split "ix86_rotl<dwi>3_doubleword" + [(set (match_operand:<DWI> 0 "register_operand" "=r") + (rotate:<DWI> (match_operand:<DWI> 1 "register_operand" "0") + (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:DWIH 3 "=&r"))] + "" + "#" + "reload_completed" + [(set (match_dup 3) (match_dup 4)) + (parallel + [(set (match_dup 4) + (ior:DWIH (ashift:DWIH (match_dup 4) (match_dup 2)) + (lshiftrt:DWIH (match_dup 5) + (minus:QI (match_dup 6) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 5) + (ior:DWIH (ashift:DWIH (match_dup 5) (match_dup 2)) + (lshiftrt:DWIH (match_dup 3) + (minus:QI (match_dup 6) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)); + + split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]); +}) + +(define_insn_and_split "ix86_rotr<dwi>3_doubleword" + [(set (match_operand:<DWI> 0 "register_operand" "=r") + (rotatert:<DWI> (match_operand:<DWI> 1 "register_operand" "0") + (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:DWIH 3 "=&r"))] + "" + "#" + "reload_completed" + [(set (match_dup 3) (match_dup 4)) + (parallel + [(set (match_dup 4) + (ior:DWIH (ashiftrt:DWIH (match_dup 4) (match_dup 2)) + (ashift:DWIH (match_dup 5) + (minus:QI (match_dup 6) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 5) + (ior:DWIH (ashiftrt:DWIH (match_dup 5) (match_dup 2)) + (ashift:DWIH (match_dup 3) + (minus:QI (match_dup 6) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)); + + split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]); +}) + +(define_insn "*bmi2_rorx<mode>3_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:QI 2 "immediate_operand" "<S>")))] + "TARGET_BMI2" + "rorx\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "rotatex") + (set_attr "mode" "<MODE>")]) + +(define_insn "*<rotate_insn><mode>3_1" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") + (any_rotate:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ROTATEX: + return "#"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "<rotate>{<imodesuffix>}\t%0"; + else + return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "isa" "*,bmi2") + (set_attr "type" "rotate,rotatex") + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "rotate") + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)")))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +;; Convert rotate to the rotatex pattern to avoid flags dependency. +(define_split + [(set (match_operand:SWI48 0 "register_operand") + (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") + (match_operand:QI 2 "immediate_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (rotatert:SWI48 (match_dup 1) (match_dup 2)))] +{ + operands[2] + = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - INTVAL (operands[2])); +}) + +(define_split + [(set (match_operand:SWI48 0 "register_operand") + (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") + (match_operand:QI 2 "immediate_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (rotatert:SWI48 (match_dup 1) (match_dup 2)))]) + +(define_insn "*bmi2_rorxsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:QI 2 "immediate_operand" "I"))))] + "TARGET_64BIT && TARGET_BMI2" + "rorx\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "type" "rotatex") + (set_attr "mode" "SI")]) + +(define_insn "*<rotate_insn>si3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,I")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ROTATEX: + return "#"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "<rotate>{l}\t%k0"; + else + return "<rotate>{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set_attr "isa" "*,bmi2") + (set_attr "type" "rotate,rotatex") + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "rotate") + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)")))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "SI")]) + +;; Convert rotate to the rotatex pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (rotate:SI (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 2 "immediate_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))] +{ + operands[2] + = GEN_INT (GET_MODE_BITSIZE (SImode) - INTVAL (operands[2])); +}) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (rotatert:SI (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 2 "immediate_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]) + +(define_insn "*<rotate_insn><mode>3_1" + [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m") + (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "c<S>"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "<rotate>{<imodesuffix>}\t%0"; + else + return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "rotate") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +(define_insn "*<rotate_insn>qi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (any_rotate:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "cI"))) + (clobber (reg:CC FLAGS_REG))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_REG_STALL + || (operands[1] == const1_rtx + && TARGET_SHIFT1))" +{ + if (operands[1] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "<rotate>{b}\t%0"; + else + return "<rotate>{b}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "rotate1") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 1 "const1_operand") + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +(define_split + [(set (match_operand:HI 0 "register_operand") + (any_rotate:HI (match_dup 0) (const_int 8))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))" + [(parallel [(set (strict_low_part (match_dup 0)) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])]) + +;; Bit set / bit test instructions + +(define_expand "extv" + [(set (match_operand:SI 0 "register_operand") + (sign_extract:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "const8_operand") + (match_operand:SI 3 "const8_operand")))] + "" +{ + /* Handle extractions from %ah et al. */ + if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8) + FAIL; + + /* From mips.md: extract_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! ext_register_operand (operands[1], VOIDmode)) + FAIL; +}) + +(define_expand "extzv" + [(set (match_operand:SI 0 "register_operand") + (zero_extract:SI (match_operand 1 "ext_register_operand") + (match_operand:SI 2 "const8_operand") + (match_operand:SI 3 "const8_operand")))] + "" +{ + /* Handle extractions from %ah et al. */ + if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8) + FAIL; + + /* From mips.md: extract_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! ext_register_operand (operands[1], VOIDmode)) + FAIL; +}) + +(define_expand "insv" + [(set (zero_extract (match_operand 0 "register_operand") + (match_operand 1 "const_int_operand") + (match_operand 2 "const_int_operand")) + (match_operand 3 "register_operand"))] + "" +{ + rtx (*gen_mov_insv_1) (rtx, rtx); + + if (ix86_expand_pinsr (operands)) + DONE; + + /* Handle insertions to %ah et al. */ + if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8) + FAIL; + + /* From mips.md: insert_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! ext_register_operand (operands[0], VOIDmode)) + FAIL; + + gen_mov_insv_1 = (TARGET_64BIT + ? gen_movdi_insv_1 : gen_movsi_insv_1); + + emit_insn (gen_mov_insv_1 (operands[0], operands[3])); + DONE; +}) + +;; %%% bts, btr, btc, bt. +;; In general these instructions are *slow* when applied to memory, +;; since they enforce atomic operation. When applied to registers, +;; it depends on the cpu implementation. They're never faster than +;; the corresponding and/ior/xor operations, so with 32-bit there's +;; no point. But in 64-bit, we can't hold the relevant immediates +;; within the instruction itself, so operating on bits in the high +;; 32-bits of a register becomes easier. +;; +;; These are slow on Nocona, but fast on Athlon64. We do require the use +;; of btrq and btcq for corner cases of post-reload expansion of absdf and +;; negdf respectively, so they can never be disabled entirely. + +(define_insn "*btsq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand")) + (const_int 1)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "bts{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI")]) + +(define_insn "*btrq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand")) + (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "btr{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI")]) + +(define_insn "*btcq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand")) + (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "btc{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI")]) + +;; Allow Nocona to avoid these instructions if a register is available. + +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand")) + (const_int 1)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; + + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); + + op1 = immed_double_const (lo, hi, DImode); + if (i >= 31) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } + + emit_insn (gen_iordi3 (operands[0], operands[0], op1)); + DONE; +}) + +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand")) + (const_int 0)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; + + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); + + op1 = immed_double_const (~lo, ~hi, DImode); + if (i >= 32) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } + + emit_insn (gen_anddi3 (operands[0], operands[0], op1)); + DONE; +}) + +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand")) + (not:DI (zero_extract:DI + (match_dup 0) (const_int 1) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; + + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); + + op1 = immed_double_const (lo, hi, DImode); + if (i >= 31) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } + + emit_insn (gen_xordi3 (operands[0], operands[0], op1)); + DONE; +}) + +(define_insn "*bt<mode>" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SWI48 + (match_operand:SWI48 0 "register_operand" "r") + (const_int 1) + (match_operand:SWI48 1 "x86_64_nonmemory_operand" "rN")) + (const_int 0)))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "bt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "<MODE>")]) + +;; Store-flag instructions. + +;; For all sCOND expanders, also expand the compare or test insn that +;; generates cc0. Generate an equality comparison if `seq' or `sne'. + +(define_insn_and_split "*setcc_di_1" + [(set (match_operand:DI 0 "register_operand" "=q") + (match_operator:DI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]))] + "TARGET_64BIT && !TARGET_PARTIAL_REG_STALL" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (zero_extend:DI (match_dup 2)))] +{ + PUT_MODE (operands[1], QImode); + operands[2] = gen_lowpart (QImode, operands[0]); +}) + +(define_insn_and_split "*setcc_si_1_and" + [(set (match_operand:SI 0 "register_operand" "=q") + (match_operator:SI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 1)) + (parallel [(set (match_dup 0) (zero_extend:SI (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + PUT_MODE (operands[1], QImode); + operands[2] = gen_lowpart (QImode, operands[0]); +}) + +(define_insn_and_split "*setcc_si_1_movzbl" + [(set (match_operand:SI 0 "register_operand" "=q") + (match_operator:SI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]))] + "!TARGET_PARTIAL_REG_STALL + && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))" + "#" + "&& reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (zero_extend:SI (match_dup 2)))] +{ + PUT_MODE (operands[1], QImode); + operands[2] = gen_lowpart (QImode, operands[0]); +}) + +(define_insn "*setcc_qi" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (match_operator:QI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]))] + "" + "set%C1\t%0" + [(set_attr "type" "setcc") + (set_attr "mode" "QI")]) + +(define_insn "*setcc_qi_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (match_operator:QI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]))] + "" + "set%C1\t%0" + [(set_attr "type" "setcc") + (set_attr "mode" "QI")]) + +;; In general it is not safe to assume too much about CCmode registers, +;; so simplify-rtx stops when it sees a second one. Under certain +;; conditions this is safe on x86, so help combine not create +;; +;; seta %al +;; testb %al, %al +;; sete %al + +(define_split + [(set (match_operand:QI 0 "nonimmediate_operand") + (ne:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] + "PUT_MODE (operands[1], QImode);") + +(define_split + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand")) + (ne:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] + "PUT_MODE (operands[1], QImode);") + +(define_split + [(set (match_operand:QI 0 "nonimmediate_operand") + (eq:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + rtx new_op1 = copy_rtx (operands[1]); + operands[1] = new_op1; + PUT_MODE (new_op1, QImode); + PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1), + GET_MODE (XEXP (new_op1, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op1, VOIDmode)) + FAIL; +}) + +(define_split + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand")) + (eq:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + rtx new_op1 = copy_rtx (operands[1]); + operands[1] = new_op1; + PUT_MODE (new_op1, QImode); + PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1), + GET_MODE (XEXP (new_op1, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op1, VOIDmode)) + FAIL; +}) + +;; The SSE store flag instructions saves 0 or 0xffffffff to the result. +;; subsequent logical operations are used to imitate conditional moves. +;; 0xffffffff is NaN, but not in normalized form, so we can't represent +;; it directly. + +(define_insn "setcc_<mode>_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (match_operator:MODEF 3 "sse_comparison_operator" + [(match_operand:MODEF 1 "register_operand" "0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))] + "SSE_FLOAT_MODE_P (<MODE>mode)" + "@ + cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2} + vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssecmp") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "<MODE>")]) + +;; Basic conditional jump instructions. +;; We ignore the overflow flag for signed branch instructions. + +(define_insn "*jcc_1" + [(set (pc) + (if_then_else (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (label_ref (match_operand 0)) + (pc)))] + "" + "%+j%C1\t%l0" + [(set_attr "type" "ibr") + (set_attr "modrm" "0") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 6)))]) + +(define_insn "*jcc_2" + [(set (pc) + (if_then_else (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (pc) + (label_ref (match_operand 0))))] + "" + "%+j%c1\t%l0" + [(set_attr "type" "ibr") + (set_attr "modrm" "0") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 6)))]) + +;; In general it is not safe to assume too much about CCmode registers, +;; so simplify-rtx stops when it sees a second one. Under certain +;; conditions this is safe on x86, so help combine not create +;; +;; seta %al +;; testb %al, %al +;; je Lfoo + +(define_split + [(set (pc) + (if_then_else (ne (match_operator 0 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)) + (label_ref (match_operand 1)) + (pc)))] + "" + [(set (pc) + (if_then_else (match_dup 0) + (label_ref (match_dup 1)) + (pc)))] + "PUT_MODE (operands[0], VOIDmode);") + +(define_split + [(set (pc) + (if_then_else (eq (match_operator 0 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)) + (label_ref (match_operand 1)) + (pc)))] + "" + [(set (pc) + (if_then_else (match_dup 0) + (label_ref (match_dup 1)) + (pc)))] +{ + rtx new_op0 = copy_rtx (operands[0]); + operands[0] = new_op0; + PUT_MODE (new_op0, VOIDmode); + PUT_CODE (new_op0, ix86_reverse_condition (GET_CODE (new_op0), + GET_MODE (XEXP (new_op0, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op0, VOIDmode)) + FAIL; +}) + +;; zero_extend in SImode is correct also for DImode, since this is what combine +;; pass generates from shift insn with QImode operand. Actually, the mode +;; of operand 2 (bit offset operand) doesn't matter since bt insn takes +;; appropriate modulo of the bit offset value. + +(define_insn_and_split "*jcc_bt<mode>" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SWI48 + (match_operand:SWI48 1 "register_operand" "r") + (const_int 1) + (zero_extend:SI + (match_operand:QI 2 "register_operand" "r"))) + (const_int 0)]) + (label_ref (match_operand 3)) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SWI48 + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], QImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +;; Like *jcc_bt<mode>, but expect a SImode operand 2 instead of QImode +;; zero extended to SImode. +(define_insn_and_split "*jcc_bt<mode>_1" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SWI48 + (match_operand:SWI48 1 "register_operand" "r") + (const_int 1) + (match_operand:SI 2 "register_operand" "r")) + (const_int 0)]) + (label_ref (match_operand 3)) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SWI48 + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +;; Avoid useless masking of bit offset operand. "and" in SImode is correct +;; also for DImode, this is what combine produces. +(define_insn_and_split "*jcc_bt<mode>_mask" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SWI48 + (match_operand:SWI48 1 "register_operand" "r") + (const_int 1) + (and:SI + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")))]) + (label_ref (match_operand 4)) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) + == GET_MODE_BITSIZE (<MODE>mode)-1" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SWI48 + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 4)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +(define_insn_and_split "*jcc_btsi_1" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(and:SI + (lshiftrt:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r")) + (const_int 1)) + (const_int 0)]) + (label_ref (match_operand 3)) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +;; avoid useless masking of bit offset operand +(define_insn_and_split "*jcc_btsi_mask_1" + [(set (pc) + (if_then_else + (match_operator 0 "bt_comparison_operator" + [(and:SI + (lshiftrt:SI + (match_operand:SI 1 "register_operand" "r") + (subreg:QI + (and:SI + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")) 0)) + (const_int 1)) + (const_int 0)]) + (label_ref (match_operand 4)) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && (INTVAL (operands[3]) & 0x1f) == 0x1f" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 4)) + (pc)))] + "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));") + +;; Define combination compare-and-branch fp compare instructions to help +;; combine. + +(define_insn "*jcc<mode>_0_i387" + [(set (pc) + (if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator" + [(match_operand:X87MODEF 1 "register_operand" "f") + (match_operand:X87MODEF 2 "const0_operand")]) + (label_ref (match_operand 3)) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 && !TARGET_CMOVE" + "#") + +(define_insn "*jcc<mode>_0_r_i387" + [(set (pc) + (if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator" + [(match_operand:X87MODEF 1 "register_operand" "f") + (match_operand:X87MODEF 2 "const0_operand")]) + (pc) + (label_ref (match_operand 3)))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 && !TARGET_CMOVE" + "#") + +(define_insn "*jccxf_i387" + [(set (pc) + (if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator" + [(match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f")]) + (label_ref (match_operand 3)) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 && !TARGET_CMOVE" + "#") + +(define_insn "*jccxf_r_i387" + [(set (pc) + (if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator" + [(match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f")]) + (pc) + (label_ref (match_operand 3)))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 && !TARGET_CMOVE" + "#") + +(define_insn "*jcc<mode>_i387" + [(set (pc) + (if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator" + [(match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm")]) + (label_ref (match_operand 3)) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 && !TARGET_CMOVE" + "#") + +(define_insn "*jcc<mode>_r_i387" + [(set (pc) + (if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator" + [(match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm")]) + (pc) + (label_ref (match_operand 3)))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 && !TARGET_CMOVE" + "#") + +(define_insn "*jccu<mode>_i387" + [(set (pc) + (if_then_else (match_operator:CCFPU 0 "ix86_fp_comparison_operator" + [(match_operand:X87MODEF 1 "register_operand" "f") + (match_operand:X87MODEF 2 "register_operand" "f")]) + (label_ref (match_operand 3)) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 && !TARGET_CMOVE" + "#") + +(define_insn "*jccu<mode>_r_i387" + [(set (pc) + (if_then_else (match_operator:CCFPU 0 "ix86_fp_comparison_operator" + [(match_operand:X87MODEF 1 "register_operand" "f") + (match_operand:X87MODEF 2 "register_operand" "f")]) + (pc) + (label_ref (match_operand 3)))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 && !TARGET_CMOVE" + "#") + +(define_split + [(set (pc) + (if_then_else (match_operator 0 "ix86_fp_comparison_operator" + [(match_operand:X87MODEF 1 "register_operand") + (match_operand:X87MODEF 2 "nonimmediate_operand")]) + (match_operand 3) + (match_operand 4))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_80387 && !TARGET_CMOVE + && reload_completed" + [(const_int 0)] +{ + ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2], + operands[3], operands[4], NULL_RTX); + DONE; +}) + +(define_split + [(set (pc) + (if_then_else (match_operator 0 "ix86_fp_comparison_operator" + [(match_operand:X87MODEF 1 "register_operand") + (match_operand:X87MODEF 2 "general_operand")]) + (match_operand 3) + (match_operand 4))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 5))] + "TARGET_80387 && !TARGET_CMOVE + && reload_completed" + [(const_int 0)] +{ + ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2], + operands[3], operands[4], operands[5]); + DONE; +}) + +;; The order of operands in *jcc<fp>_<int>_i387 is forced by combine in +;; simplify_comparison () function. Float operator is treated as RTX_OBJ +;; with a precedence over other operators and is always put in the first +;; place. Swap condition and operands to match ficom instruction. + +(define_insn "*jcc<X87MODEF:mode>_<SWI24:mode>_i387" + [(set (pc) + (if_then_else + (match_operator:CCFP 0 "ix86_swapped_fp_comparison_operator" + [(match_operator:X87MODEF 1 "float_operator" + [(match_operand:SWI24 2 "nonimmediate_operand" "m")]) + (match_operand:X87MODEF 3 "register_operand" "f")]) + (label_ref (match_operand 4)) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 5 "=a"))] + "TARGET_80387 && !TARGET_CMOVE + && (TARGET_USE_<SWI24:MODE>MODE_FIOP + || optimize_function_for_size_p (cfun))" + "#") + +(define_insn "*jcc<X87MODEF:mode>_<SWI24:mode>_r_i387" + [(set (pc) + (if_then_else + (match_operator:CCFP 0 "ix86_swapped_fp_comparison_operator" + [(match_operator:X87MODEF 1 "float_operator" + [(match_operand:SWI24 2 "nonimmediate_operand" "m")]) + (match_operand:X87MODEF 3 "register_operand" "f")]) + (pc) + (label_ref (match_operand 4)))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 5 "=a"))] + "TARGET_80387 && !TARGET_CMOVE + && (TARGET_USE_<SWI24:MODE>MODE_FIOP + || optimize_function_for_size_p (cfun))" + "#") + +(define_split + [(set (pc) + (if_then_else + (match_operator:CCFP 0 "ix86_swapped_fp_comparison_operator" + [(match_operator:X87MODEF 1 "float_operator" + [(match_operand:SWI24 2 "memory_operand")]) + (match_operand:X87MODEF 3 "register_operand")]) + (match_operand 4) + (match_operand 5))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 6))] + "TARGET_80387 && !TARGET_CMOVE + && reload_completed" + [(const_int 0)] +{ + ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])), operands[3], + gen_rtx_FLOAT (GET_MODE (operands[1]), operands[2]), + operands[4], operands[5], operands[6]); + DONE; +}) + +;; Unconditional and other jump instructions + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0)))] + "" + "jmp\t%l0" + [(set_attr "type" "ibr") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 5))) + (set_attr "modrm" "0")]) + +(define_expand "indirect_jump" + [(set (pc) (match_operand 0 "indirect_branch_operand"))] + "" +{ + if (TARGET_X32) + operands[0] = convert_memory_address (word_mode, operands[0]); +}) + +(define_insn "*indirect_jump" + [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rw"))] + "" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +(define_expand "tablejump" + [(parallel [(set (pc) (match_operand 0 "indirect_branch_operand")) + (use (label_ref (match_operand 1)))])] + "" +{ + /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit) + relative. Convert the relative address to an absolute address. */ + if (flag_pic) + { + rtx op0, op1; + enum rtx_code code; + + /* We can't use @GOTOFF for text labels on VxWorks; + see gotoff_operand. */ + if (TARGET_64BIT || TARGET_VXWORKS_RTP) + { + code = PLUS; + op0 = operands[0]; + op1 = gen_rtx_LABEL_REF (Pmode, operands[1]); + } + else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA) + { + code = PLUS; + op0 = operands[0]; + op1 = pic_offset_table_rtx; + } + else + { + code = MINUS; + op0 = pic_offset_table_rtx; + op1 = operands[0]; + } + + operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0, + OPTAB_DIRECT); + } + + if (TARGET_X32) + operands[0] = convert_memory_address (word_mode, operands[0]); +}) + +(define_insn "*tablejump_1" + [(set (pc) (match_operand:W 0 "indirect_branch_operand" "rw")) + (use (label_ref (match_operand 1)))] + "" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +;; Convert setcc + movzbl to xor + setcc if operands don't overlap. + +(define_peephole2 + [(set (reg FLAGS_REG) (match_operand 0)) + (set (match_operand:QI 1 "register_operand") + (match_operator:QI 2 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (set (match_operand 3 "q_regs_operand") + (zero_extend (match_dup 1)))] + "(peep2_reg_dead_p (3, operands[1]) + || operands_match_p (operands[1], operands[3])) + && ! reg_overlap_mentioned_p (operands[3], operands[0])" + [(set (match_dup 4) (match_dup 0)) + (set (strict_low_part (match_dup 5)) + (match_dup 2))] +{ + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[5] = gen_lowpart (QImode, operands[3]); + ix86_expand_clear (operands[3]); +}) + +(define_peephole2 + [(parallel [(set (reg FLAGS_REG) (match_operand 0)) + (match_operand 4)]) + (set (match_operand:QI 1 "register_operand") + (match_operator:QI 2 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (set (match_operand 3 "q_regs_operand") + (zero_extend (match_dup 1)))] + "(peep2_reg_dead_p (3, operands[1]) + || operands_match_p (operands[1], operands[3])) + && ! reg_overlap_mentioned_p (operands[3], operands[0])" + [(parallel [(set (match_dup 5) (match_dup 0)) + (match_dup 4)]) + (set (strict_low_part (match_dup 6)) + (match_dup 2))] +{ + operands[5] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[6] = gen_lowpart (QImode, operands[3]); + ix86_expand_clear (operands[3]); +}) + +;; Similar, but match zero extend with andsi3. + +(define_peephole2 + [(set (reg FLAGS_REG) (match_operand 0)) + (set (match_operand:QI 1 "register_operand") + (match_operator:QI 2 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (parallel [(set (match_operand:SI 3 "q_regs_operand") + (and:SI (match_dup 3) (const_int 255))) + (clobber (reg:CC FLAGS_REG))])] + "REGNO (operands[1]) == REGNO (operands[3]) + && ! reg_overlap_mentioned_p (operands[3], operands[0])" + [(set (match_dup 4) (match_dup 0)) + (set (strict_low_part (match_dup 5)) + (match_dup 2))] +{ + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[5] = gen_lowpart (QImode, operands[3]); + ix86_expand_clear (operands[3]); +}) + +(define_peephole2 + [(parallel [(set (reg FLAGS_REG) (match_operand 0)) + (match_operand 4)]) + (set (match_operand:QI 1 "register_operand") + (match_operator:QI 2 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (parallel [(set (match_operand 3 "q_regs_operand") + (zero_extend (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "(peep2_reg_dead_p (3, operands[1]) + || operands_match_p (operands[1], operands[3])) + && ! reg_overlap_mentioned_p (operands[3], operands[0])" + [(parallel [(set (match_dup 5) (match_dup 0)) + (match_dup 4)]) + (set (strict_low_part (match_dup 6)) + (match_dup 2))] +{ + operands[5] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[6] = gen_lowpart (QImode, operands[3]); + ix86_expand_clear (operands[3]); +}) + +;; Call instructions. + +;; The predicates normally associated with named expanders are not properly +;; checked for calls. This is a bug in the generic code, but it isn't that +;; easy to fix. Ignore it for now and be prepared to fix things up. + +;; P6 processors will jump to the address after the decrement when %esp +;; is used as a call operand, so they will execute return address as a code. +;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17. + +;; Register constraint for call instruction. +(define_mode_attr c [(SI "l") (DI "r")]) + +;; Call subroutine returning no value. + +(define_expand "call" + [(call (match_operand:QI 0) + (match_operand 1)) + (use (match_operand 2))] + "" +{ + ix86_expand_call (NULL, operands[0], operands[1], + operands[2], NULL, false); + DONE; +}) + +(define_expand "sibcall" + [(call (match_operand:QI 0) + (match_operand 1)) + (use (match_operand 2))] + "" +{ + ix86_expand_call (NULL, operands[0], operands[1], + operands[2], NULL, true); + DONE; +}) + +(define_insn "*call" + [(call (mem:QI (match_operand:W 0 "call_insn_operand" "<c>zw")) + (match_operand 1))] + "!SIBLING_CALL_P (insn)" + "* return ix86_output_call_insn (insn, operands[0]);" + [(set_attr "type" "call")]) + +(define_insn "*call_rex64_ms_sysv" + [(match_parallel 2 "call_rex64_ms_sysv_operation" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzw")) + (match_operand 1)) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)])] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" + "* return ix86_output_call_insn (insn, operands[0]);" + [(set_attr "type" "call")]) + +(define_insn "*sibcall" + [(call (mem:QI (match_operand:W 0 "sibcall_insn_operand" "Uz")) + (match_operand 1))] + "SIBLING_CALL_P (insn)" + "* return ix86_output_call_insn (insn, operands[0]);" + [(set_attr "type" "call")]) + +(define_expand "call_pop" + [(parallel [(call (match_operand:QI 0) + (match_operand:SI 1)) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3)))])] + "!TARGET_64BIT" +{ + ix86_expand_call (NULL, operands[0], operands[1], + operands[2], operands[3], false); + DONE; +}) + +(define_insn "*call_pop" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm")) + (match_operand 1)) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i")))] + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" + "* return ix86_output_call_insn (insn, operands[0]);" + [(set_attr "type" "call")]) + +(define_insn "*sibcall_pop" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "Uz")) + (match_operand 1)) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i")))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + "* return ix86_output_call_insn (insn, operands[0]);" + [(set_attr "type" "call")]) + +;; Call subroutine, returning value in operand 0 + +(define_expand "call_value" + [(set (match_operand 0) + (call (match_operand:QI 1) + (match_operand 2))) + (use (match_operand 3))] + "" +{ + ix86_expand_call (operands[0], operands[1], operands[2], + operands[3], NULL, false); + DONE; +}) + +(define_expand "sibcall_value" + [(set (match_operand 0) + (call (match_operand:QI 1) + (match_operand 2))) + (use (match_operand 3))] + "" +{ + ix86_expand_call (operands[0], operands[1], operands[2], + operands[3], NULL, true); + DONE; +}) + +(define_insn "*call_value" + [(set (match_operand 0) + (call (mem:QI (match_operand:W 1 "call_insn_operand" "<c>zw")) + (match_operand 2)))] + "!SIBLING_CALL_P (insn)" + "* return ix86_output_call_insn (insn, operands[1]);" + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value" + [(set (match_operand 0) + (call (mem:QI (match_operand:W 1 "sibcall_insn_operand" "Uz")) + (match_operand 2)))] + "SIBLING_CALL_P (insn)" + "* return ix86_output_call_insn (insn, operands[1]);" + [(set_attr "type" "callv")]) + +(define_insn "*call_value_rex64_ms_sysv" + [(match_parallel 3 "call_rex64_ms_sysv_operation" + [(set (match_operand 0) + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzw")) + (match_operand 2))) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)])] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" + "* return ix86_output_call_insn (insn, operands[1]);" + [(set_attr "type" "callv")]) + +(define_expand "call_value_pop" + [(parallel [(set (match_operand 0) + (call (match_operand:QI 1) + (match_operand:SI 2))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 4)))])] + "!TARGET_64BIT" +{ + ix86_expand_call (operands[0], operands[1], operands[2], + operands[3], operands[4], false); + DONE; +}) + +(define_insn "*call_value_pop" + [(set (match_operand 0) + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm")) + (match_operand 2))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i")))] + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" + "* return ix86_output_call_insn (insn, operands[1]);" + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_pop" + [(set (match_operand 0) + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "Uz")) + (match_operand 2))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i")))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + "* return ix86_output_call_insn (insn, operands[1]);" + [(set_attr "type" "callv")]) + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0) + (const_int 0)) + (match_operand 1) + (match_operand 2)])] + "" +{ + int i; + + /* In order to give reg-stack an easier job in validating two + coprocessor registers as containing a possible return value, + simply pretend the untyped call returns a complex long double + value. + + We can't use SSE_REGPARM_MAX here since callee is unprototyped + and should have the default ABI. */ + + ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387 + ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL), + operands[0], const0_rtx, + GEN_INT ((TARGET_64BIT + ? (ix86_abi == SYSV_ABI + ? X86_64_SSE_REGPARM_MAX + : X86_64_MS_SSE_REGPARM_MAX) + : X86_32_SSE_REGPARM_MAX) + - 1), + NULL, false); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}) + +;; Prologue and epilogue instructions + +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0")]) + +;; Do not schedule instructions accessing memory across this point. + +(define_expand "memory_blockage" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*memory_blockage" + [(set (match_operand:BLK 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))] + "" + "" + [(set_attr "length" "0")]) + +;; As USE insns aren't meaningful after reload, this is used instead +;; to prevent deleting instructions setting registers for PIC code +(define_insn "prologue_use" + [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)] + "" + "" + [(set_attr "length" "0")]) + +;; Insn emitted into the body of a function to return from a function. +;; This is only done if the function's epilogue is known to be simple. +;; See comments for ix86_can_use_return_insn_p in i386.c. + +(define_expand "return" + [(simple_return)] + "ix86_can_use_return_insn_p ()" +{ + if (crtl->args.pops_args) + { + rtx popc = GEN_INT (crtl->args.pops_args); + emit_jump_insn (gen_simple_return_pop_internal (popc)); + DONE; + } +}) + +;; We need to disable this for TARGET_SEH, as otherwise +;; shrink-wrapped prologue gets enabled too. This might exceed +;; the maximum size of prologue in unwind information. + +(define_expand "simple_return" + [(simple_return)] + "!TARGET_SEH" +{ + if (crtl->args.pops_args) + { + rtx popc = GEN_INT (crtl->args.pops_args); + emit_jump_insn (gen_simple_return_pop_internal (popc)); + DONE; + } +}) + +(define_insn "simple_return_internal" + [(simple_return)] + "reload_completed" + "ret" + [(set_attr "length" "1") + (set_attr "atom_unit" "jeu") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET +;; instruction Athlon and K8 have. + +(define_insn "simple_return_internal_long" + [(simple_return) + (unspec [(const_int 0)] UNSPEC_REP)] + "reload_completed" + "rep%; ret" + [(set_attr "length" "2") + (set_attr "atom_unit" "jeu") + (set_attr "length_immediate" "0") + (set_attr "prefix_rep" "1") + (set_attr "modrm" "0")]) + +(define_insn "simple_return_pop_internal" + [(simple_return) + (use (match_operand:SI 0 "const_int_operand"))] + "reload_completed" + "ret\t%0" + [(set_attr "length" "3") + (set_attr "atom_unit" "jeu") + (set_attr "length_immediate" "2") + (set_attr "modrm" "0")]) + +(define_insn "simple_return_indirect_internal" + [(simple_return) + (use (match_operand:SI 0 "register_operand" "r"))] + "reload_completed" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +;; Generate nops. Operand 0 is the number of nops, up to 8. +(define_insn "nops" + [(unspec_volatile [(match_operand 0 "const_int_operand")] + UNSPECV_NOPS)] + "reload_completed" +{ + int num = INTVAL (operands[0]); + + gcc_assert (IN_RANGE (num, 1, 8)); + + while (num--) + fputs ("\tnop\n", asm_out_file); + + return ""; +} + [(set (attr "length") (symbol_ref "INTVAL (operands[0])")) + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +;; Pad to 16-byte boundary, max skip in op0. Used to avoid +;; branch prediction penalty for the third jump in a 16-byte +;; block on K8. + +(define_insn "pad" + [(unspec_volatile [(match_operand 0)] UNSPECV_ALIGN)] + "" +{ +#ifdef ASM_OUTPUT_MAX_SKIP_PAD + ASM_OUTPUT_MAX_SKIP_PAD (asm_out_file, 4, (int)INTVAL (operands[0])); +#else + /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that. + The align insn is used to avoid 3 jump instructions in the row to improve + branch prediction and the benefits hardly outweigh the cost of extra 8 + nops on the average inserted by full alignment pseudo operation. */ +#endif + return ""; +} + [(set_attr "length" "16")]) + +(define_expand "prologue" + [(const_int 0)] + "" + "ix86_expand_prologue (); DONE;") + +(define_insn "set_got" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_SET_GOT)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "* return output_set_got (operands[0], NULL_RTX);" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "set_got_labelled" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(label_ref (match_operand 1))] + UNSPEC_SET_GOT)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "* return output_set_got (operands[0], operands[1]);" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "set_got_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))] + "TARGET_64BIT" + "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}" + [(set_attr "type" "lea") + (set_attr "length_address" "4") + (set_attr "mode" "DI")]) + +(define_insn "set_rip_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(label_ref (match_operand 1))] UNSPEC_SET_RIP))] + "TARGET_64BIT" + "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}" + [(set_attr "type" "lea") + (set_attr "length_address" "4") + (set_attr "mode" "DI")]) + +(define_insn "set_got_offset_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI + [(label_ref (match_operand 1))] + UNSPEC_SET_GOT_OFFSET))] + "TARGET_LP64" + "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}" + [(set_attr "type" "imov") + (set_attr "length_immediate" "0") + (set_attr "length_address" "8") + (set_attr "mode" "DI")]) + +(define_expand "epilogue" + [(const_int 0)] + "" + "ix86_expand_epilogue (1); DONE;") + +(define_expand "sibcall_epilogue" + [(const_int 0)] + "" + "ix86_expand_epilogue (0); DONE;") + +(define_expand "eh_return" + [(use (match_operand 0 "register_operand"))] + "" +{ + rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0]; + + /* Tricky bit: we write the address of the handler to which we will + be returning into someone else's stack frame, one word below the + stack address we wish to restore. */ + tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa); + tmp = plus_constant (Pmode, tmp, -UNITS_PER_WORD); + tmp = gen_rtx_MEM (Pmode, tmp); + emit_move_insn (tmp, ra); + + emit_jump_insn (gen_eh_return_internal ()); + emit_barrier (); + DONE; +}) + +(define_insn_and_split "eh_return_internal" + [(eh_return)] + "" + "#" + "epilogue_completed" + [(const_int 0)] + "ix86_expand_epilogue (2); DONE;") + +(define_insn "leave" + [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4))) + (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG))) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" + "leave" + [(set_attr "type" "leave")]) + +(define_insn "leave_rex64" + [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8))) + (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG))) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" + "leave" + [(set_attr "type" "leave")]) + +;; Handle -fsplit-stack. + +(define_expand "split_stack_prologue" + [(const_int 0)] + "" +{ + ix86_expand_split_stack_prologue (); + DONE; +}) + +;; In order to support the call/return predictor, we use a return +;; instruction which the middle-end doesn't see. +(define_insn "split_stack_return" + [(unspec_volatile [(match_operand:SI 0 "const_int_operand")] + UNSPECV_SPLIT_STACK_RETURN)] + "" +{ + if (operands[0] == const0_rtx) + return "ret"; + else + return "ret\t%0"; +} + [(set_attr "atom_unit" "jeu") + (set_attr "modrm" "0") + (set (attr "length") + (if_then_else (match_operand:SI 0 "const0_operand") + (const_int 1) + (const_int 3))) + (set (attr "length_immediate") + (if_then_else (match_operand:SI 0 "const0_operand") + (const_int 0) + (const_int 2)))]) + +;; If there are operand 0 bytes available on the stack, jump to +;; operand 1. + +(define_expand "split_stack_space_check" + [(set (pc) (if_then_else + (ltu (minus (reg SP_REG) + (match_operand 0 "register_operand")) + (unspec [(const_int 0)] UNSPEC_STACK_CHECK)) + (label_ref (match_operand 1)) + (pc)))] + "" +{ + rtx reg, size, limit; + + reg = gen_reg_rtx (Pmode); + size = force_reg (Pmode, operands[0]); + emit_insn (gen_sub3_insn (reg, stack_pointer_rtx, size)); + limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_STACK_CHECK); + limit = gen_rtx_MEM (Pmode, gen_rtx_CONST (Pmode, limit)); + ix86_expand_branch (GEU, reg, limit, operands[1]); + + DONE; +}) + +;; Bit manipulation instructions. + +(define_expand "ffs<mode>2" + [(set (match_dup 2) (const_int -1)) + (parallel [(set (match_dup 3) (match_dup 4)) + (set (match_operand:SWI48 0 "register_operand") + (ctz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand")))]) + (set (match_dup 0) (if_then_else:SWI48 + (eq (match_dup 3) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1))) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + enum machine_mode flags_mode; + + if (<MODE>mode == SImode && !TARGET_CMOVE) + { + emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1])); + DONE; + } + + flags_mode = TARGET_BMI ? CCCmode : CCZmode; + + operands[2] = gen_reg_rtx (<MODE>mode); + operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG); + operands[4] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx); +}) + +(define_insn_and_split "ffssi2_no_cmove" + [(set (match_operand:SI 0 "register_operand" "=r") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (match_scratch:SI 2 "=&q")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_CMOVE" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 4) (match_dup 5)) + (set (match_dup 0) (ctz:SI (match_dup 1)))]) + (set (strict_low_part (match_dup 3)) + (eq:QI (match_dup 4) (const_int 0))) + (parallel [(set (match_dup 2) (neg:SI (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC FLAGS_REG))])] +{ + enum machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode; + + operands[3] = gen_lowpart (QImode, operands[2]); + operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG); + operands[5] = gen_rtx_COMPARE (flags_mode, operands[1], const0_rtx); + + ix86_expand_clear (operands[2]); +}) + +(define_insn "*tzcnt<mode>_1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 0))) + (set (match_operand:SWI48 0 "register_operand" "=r") + (ctz:SWI48 (match_dup 1)))] + "TARGET_BMI" + "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "btver2_decode" "double") + (set_attr "mode" "<MODE>")]) + +(define_insn "*bsf<mode>_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 0))) + (set (match_operand:SWI48 0 "register_operand" "=r") + (ctz:SWI48 (match_dup 1)))] + "" + "bsf{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "btver2_decode" "double") + (set_attr "mode" "<MODE>")]) + +(define_insn "ctz<mode>2" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "" +{ + if (TARGET_BMI) + return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; + else if (optimize_function_for_size_p (cfun)) + ; + else if (TARGET_GENERIC) + /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */ + return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}"; + + return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set (attr "prefix_rep") + (if_then_else + (ior (match_test "TARGET_BMI") + (and (not (match_test "optimize_function_for_size_p (cfun)")) + (match_test "TARGET_GENERIC"))) + (const_string "1") + (const_string "0"))) + (set_attr "mode" "<MODE>")]) + +(define_expand "clz<mode>2" + [(parallel + [(set (match_operand:SWI248 0 "register_operand") + (minus:SWI248 + (match_dup 2) + (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand")))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 0) (xor:SWI248 (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (TARGET_LZCNT) + { + emit_insn (gen_clz<mode>2_lzcnt (operands[0], operands[1])); + DONE; + } + operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1); +}) + +(define_insn "clz<mode>2_lzcnt" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_LZCNT" + "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +;; BMI instructions. +(define_insn "*bmi_andn_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r,r") + (and:SWI48 + (not:SWI48 + (match_operand:SWI48 1 "register_operand" "r,r")) + (match_operand:SWI48 2 "nonimmediate_operand" "r,m"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "andn\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "btver2_decode" "direct, double") + (set_attr "mode" "<MODE>")]) + +(define_insn "bmi_bextr_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r,r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m") + (match_operand:SWI48 2 "register_operand" "r,r")] + UNSPEC_BEXTR)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "bextr\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "btver2_decode" "direct, double") + (set_attr "mode" "<MODE>")]) + +(define_insn "*bmi_blsi_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (neg:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm")) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "blsi\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "btver2_decode" "double") + (set_attr "mode" "<MODE>")]) + +(define_insn "*bmi_blsmsk_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (xor:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "blsmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "btver2_decode" "double") + (set_attr "mode" "<MODE>")]) + +(define_insn "*bmi_blsr_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI" + "blsr\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "btver2_decode" "double") + (set_attr "mode" "<MODE>")]) + +;; BMI2 instructions. +(define_insn "bmi2_bzhi_<mode>3" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 (lshiftrt:SWI48 (const_int -1) + (match_operand:SWI48 2 "register_operand" "r")) + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2" + "bzhi\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "prefix" "vex") + (set_attr "mode" "<MODE>")]) + +(define_insn "bmi2_pdep_<mode>3" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r") + (match_operand:SWI48 2 "nonimmediate_operand" "rm")] + UNSPEC_PDEP))] + "TARGET_BMI2" + "pdep\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "prefix" "vex") + (set_attr "mode" "<MODE>")]) + +(define_insn "bmi2_pext_<mode>3" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r") + (match_operand:SWI48 2 "nonimmediate_operand" "rm")] + UNSPEC_PEXT))] + "TARGET_BMI2" + "pext\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "prefix" "vex") + (set_attr "mode" "<MODE>")]) + +;; TBM instructions. +(define_insn "tbm_bextri_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (zero_extract:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:SWI48 2 "const_0_to_255_operand" "n") + (match_operand:SWI48 3 "const_0_to_255_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) << 8 | INTVAL (operands[3])); + return "bextr\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blcfill_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blcfill\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blci_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (not:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1))) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blci\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blcic_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (not:SWI48 + (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blcic\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blcmsk_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (xor:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blcmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blcs_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blcs\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blsfill_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blsfill\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_blsic_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (not:SWI48 + (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "blsic\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_t1mskc_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ior:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int 1)) + (not:SWI48 + (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "t1mskc\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*tbm_tzmsk_<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (plus:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (const_int -1)) + (not:SWI48 + (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_TBM" + "tzmsk\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "bsr_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (const_int 63) + (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "bsr{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI")]) + +(define_insn "bsr" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (const_int 31) + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "" + "bsr{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "SI")]) + +(define_insn "*bsrhi" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (const_int 15) + (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "" + "bsr{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "mode" "HI")]) + +(define_insn "popcount<mode>2" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*popcount<mode>2_cmp" + [(set (reg FLAGS_REG) + (compare + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "rm")) + (const_int 0))) + (set (match_operand:SWI248 0 "register_operand" "=r") + (popcount:SWI248 (match_dup 1)))] + "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*popcountsi2_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI(popcount:SI (match_dup 1))))] + "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{l}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) + +(define_expand "bswapdi2" + [(set (match_operand:DI 0 "register_operand") + (bswap:DI (match_operand:DI 1 "nonimmediate_operand")))] + "TARGET_64BIT" +{ + if (!TARGET_MOVBE) + operands[1] = force_reg (DImode, operands[1]); +}) + +(define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand") + (bswap:SI (match_operand:SI 1 "nonimmediate_operand")))] + "" +{ + if (TARGET_MOVBE) + ; + else if (TARGET_BSWAP) + operands[1] = force_reg (SImode, operands[1]); + else + { + rtx x = operands[0]; + + emit_move_insn (x, operands[1]); + emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x))); + emit_insn (gen_rotlsi3 (x, x, GEN_INT (16))); + emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x))); + DONE; + } +}) + +(define_insn "*bswap<mode>2_movbe" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,m") + (bswap:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,m,r")))] + "TARGET_MOVBE + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + bswap\t%0 + movbe\t{%1, %0|%0, %1} + movbe\t{%1, %0|%0, %1}" + [(set_attr "type" "bitmanip,imov,imov") + (set_attr "modrm" "0,1,1") + (set_attr "prefix_0f" "*,1,1") + (set_attr "prefix_extra" "*,1,1") + (set_attr "mode" "<MODE>")]) + +(define_insn "*bswap<mode>2" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (bswap:SWI48 (match_operand:SWI48 1 "register_operand" "0")))] + "TARGET_BSWAP" + "bswap\t%0" + [(set_attr "type" "bitmanip") + (set_attr "modrm" "0") + (set_attr "mode" "<MODE>")]) + +(define_insn "*bswaphi_lowpart_1" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r")) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_XCHGB || optimize_function_for_size_p (cfun)" + "@ + xchg{b}\t{%h0, %b0|%b0, %h0} + rol{w}\t{$8, %0|%0, 8}" + [(set_attr "length" "2,4") + (set_attr "mode" "QI,HI")]) + +(define_insn "bswaphi_lowpart" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r")) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "rol{w}\t{$8, %0|%0, 8}" + [(set_attr "length" "4") + (set_attr "mode" "HI")]) + +(define_expand "paritydi2" + [(set (match_operand:DI 0 "register_operand") + (parity:DI (match_operand:DI 1 "register_operand")))] + "! TARGET_POPCNT" +{ + rtx scratch = gen_reg_rtx (QImode); + rtx cond; + + emit_insn (gen_paritydi2_cmp (NULL_RTX, NULL_RTX, + NULL_RTX, operands[1])); + + cond = gen_rtx_fmt_ee (ORDERED, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, scratch, cond)); + + if (TARGET_64BIT) + emit_insn (gen_zero_extendqidi2 (operands[0], scratch)); + else + { + rtx tmp = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extendqisi2 (tmp, scratch)); + emit_insn (gen_zero_extendsidi2 (operands[0], tmp)); + } + DONE; +}) + +(define_expand "paritysi2" + [(set (match_operand:SI 0 "register_operand") + (parity:SI (match_operand:SI 1 "register_operand")))] + "! TARGET_POPCNT" +{ + rtx scratch = gen_reg_rtx (QImode); + rtx cond; + + emit_insn (gen_paritysi2_cmp (NULL_RTX, NULL_RTX, operands[1])); + + cond = gen_rtx_fmt_ee (ORDERED, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, scratch, cond)); + + emit_insn (gen_zero_extendqisi2 (operands[0], scratch)); + DONE; +}) + +(define_insn_and_split "paritydi2_cmp" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:DI 3 "register_operand" "0")] + UNSPEC_PARITY)) + (clobber (match_scratch:DI 0 "=r")) + (clobber (match_scratch:SI 1 "=&r")) + (clobber (match_scratch:HI 2 "=Q"))] + "! TARGET_POPCNT" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 1) + (xor:SI (match_dup 1) (match_dup 4))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 1)] UNSPEC_PARITY)) + (clobber (match_dup 1)) + (clobber (match_dup 2))])] +{ + operands[4] = gen_lowpart (SImode, operands[3]); + + if (TARGET_64BIT) + { + emit_move_insn (operands[1], gen_lowpart (SImode, operands[3])); + emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32))); + } + else + operands[1] = gen_highpart (SImode, operands[3]); +}) + +(define_insn_and_split "paritysi2_cmp" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:SI 2 "register_operand" "0")] + UNSPEC_PARITY)) + (clobber (match_scratch:SI 0 "=r")) + (clobber (match_scratch:HI 1 "=&Q"))] + "! TARGET_POPCNT" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 1) + (xor:HI (match_dup 1) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 1)] UNSPEC_PARITY)) + (clobber (match_dup 1))])] +{ + operands[3] = gen_lowpart (HImode, operands[2]); + + emit_move_insn (operands[1], gen_lowpart (HImode, operands[2])); + emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16))); +}) + +(define_insn "*parityhi2_cmp" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:HI 1 "register_operand" "0")] + UNSPEC_PARITY)) + (clobber (match_scratch:HI 0 "=Q"))] + "! TARGET_POPCNT" + "xor{b}\t{%h0, %b0|%b0, %h0}" + [(set_attr "length" "2") + (set_attr "mode" "HI")]) + + +;; Thread-local storage patterns for ELF. +;; +;; Note that these code sequences must appear exactly as shown +;; in order to allow linker relaxation. + +(define_insn "*tls_global_dynamic_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI + [(match_operand:SI 1 "register_operand" "b") + (match_operand 2 "tls_symbolic_operand") + (match_operand 3 "constant_call_address_operand" "z")] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU_TLS" +{ + output_asm_insn + ("lea{l}\t{%E2@tlsgd(,%1,1), %0|%0, %E2@tlsgd[%1*1]}", operands); + if (TARGET_SUN_TLS) +#ifdef HAVE_AS_IX86_TLSGDPLT + return "call\t%a2@tlsgdplt"; +#else + return "call\t%p3@plt"; +#endif + return "call\t%P3"; +} + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_expand "tls_global_dynamic_32" + [(parallel + [(set (match_operand:SI 0 "register_operand") + (unspec:SI [(match_operand:SI 2 "register_operand") + (match_operand 1 "tls_symbolic_operand") + (match_operand 3 "constant_call_address_operand")] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4)) + (clobber (match_scratch:SI 5)) + (clobber (reg:CC FLAGS_REG))])]) + +(define_insn "*tls_global_dynamic_64_<mode>" + [(set (match_operand:P 0 "register_operand" "=a") + (call:P + (mem:QI (match_operand 2 "constant_call_address_operand" "z")) + (match_operand 3))) + (unspec:P [(match_operand 1 "tls_symbolic_operand")] + UNSPEC_TLS_GD)] + "TARGET_64BIT" +{ + if (!TARGET_X32) + fputs (ASM_BYTE "0x66\n", asm_out_file); + output_asm_insn + ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); + fputs (ASM_SHORT "0x6666\n", asm_out_file); + fputs ("\trex64\n", asm_out_file); + if (TARGET_SUN_TLS) + return "call\t%p2@plt"; + return "call\t%P2"; +} + [(set_attr "type" "multi") + (set (attr "length") + (symbol_ref "TARGET_X32 ? 15 : 16"))]) + +(define_insn "*tls_global_dynamic_64_largepic" + [(set (match_operand:DI 0 "register_operand" "=a") + (call:DI + (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b") + (match_operand:DI 3 "immediate_operand" "i"))) + (match_operand 4))) + (unspec:DI [(match_operand 1 "tls_symbolic_operand")] + UNSPEC_TLS_GD)] + "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF + && GET_CODE (operands[3]) == CONST + && GET_CODE (XEXP (operands[3], 0)) == UNSPEC + && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF" +{ + output_asm_insn + ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands); + output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands); + output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands); + return "call\t{*%%rax|rax}"; +} + [(set_attr "type" "multi") + (set_attr "length" "22")]) + +(define_expand "tls_global_dynamic_64_<mode>" + [(parallel + [(set (match_operand:P 0 "register_operand") + (call:P + (mem:QI (match_operand 2)) + (const_int 0))) + (unspec:P [(match_operand 1 "tls_symbolic_operand")] + UNSPEC_TLS_GD)])] + "TARGET_64BIT") + +(define_insn "*tls_local_dynamic_base_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI + [(match_operand:SI 1 "register_operand" "b") + (match_operand 2 "constant_call_address_operand" "z")] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "=d")) + (clobber (match_scratch:SI 4 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU_TLS" +{ + output_asm_insn + ("lea{l}\t{%&@tlsldm(%1), %0|%0, %&@tlsldm[%1]}", operands); + if (TARGET_SUN_TLS) + { + if (HAVE_AS_IX86_TLSLDMPLT) + return "call\t%&@tlsldmplt"; + else + return "call\t%p2@plt"; + } + return "call\t%P2"; +} + [(set_attr "type" "multi") + (set_attr "length" "11")]) + +(define_expand "tls_local_dynamic_base_32" + [(parallel + [(set (match_operand:SI 0 "register_operand") + (unspec:SI + [(match_operand:SI 1 "register_operand") + (match_operand 2 "constant_call_address_operand")] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3)) + (clobber (match_scratch:SI 4)) + (clobber (reg:CC FLAGS_REG))])]) + +(define_insn "*tls_local_dynamic_base_64_<mode>" + [(set (match_operand:P 0 "register_operand" "=a") + (call:P + (mem:QI (match_operand 1 "constant_call_address_operand" "z")) + (match_operand 2))) + (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)] + "TARGET_64BIT" +{ + output_asm_insn + ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands); + if (TARGET_SUN_TLS) + return "call\t%p1@plt"; + return "call\t%P1"; +} + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "*tls_local_dynamic_base_64_largepic" + [(set (match_operand:DI 0 "register_operand" "=a") + (call:DI + (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b") + (match_operand:DI 2 "immediate_operand" "i"))) + (match_operand 3))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)] + "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF + && GET_CODE (operands[2]) == CONST + && GET_CODE (XEXP (operands[2], 0)) == UNSPEC + && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF" +{ + output_asm_insn + ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands); + output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands); + output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands); + return "call\t{*%%rax|rax}"; +} + [(set_attr "type" "multi") + (set_attr "length" "22")]) + +(define_expand "tls_local_dynamic_base_64_<mode>" + [(parallel + [(set (match_operand:P 0 "register_operand") + (call:P + (mem:QI (match_operand 1)) + (const_int 0))) + (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])] + "TARGET_64BIT") + +;; Local dynamic of a single variable is a lose. Show combine how +;; to convert that back to global dynamic. + +(define_insn_and_split "*tls_local_dynamic_32_once" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand 2 "constant_call_address_operand" "z")] + UNSPEC_TLS_LD_BASE) + (const:SI (unspec:SI + [(match_operand 3 "tls_symbolic_operand")] + UNSPEC_DTPOFF)))) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "" + [(parallel + [(set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)] + UNSPEC_TLS_GD)) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (clobber (reg:CC FLAGS_REG))])]) + +;; Segment register for the thread base ptr load +(define_mode_attr tp_seg [(SI "gs") (DI "fs")]) + +;; Load and add the thread base pointer from %<tp_seg>:0. +(define_insn "*load_tp_x32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_TP))] + "TARGET_X32" + "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}" + [(set_attr "type" "imov") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*load_tp_x32_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (unspec:SI [(const_int 0)] UNSPEC_TP)))] + "TARGET_X32" + "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}" + [(set_attr "type" "imov") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*load_tp_<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(const_int 0)] UNSPEC_TP))] + "!TARGET_X32" + "mov{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}" + [(set_attr "type" "imov") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*add_tp_x32" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP) + (match_operand:SI 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_X32" + "add{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}" + [(set_attr "type" "alu") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*add_tp_x32_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP) + (match_operand:SI 1 "register_operand" "0")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_X32" + "add{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}" + [(set_attr "type" "alu") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*add_tp_<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (plus:P (unspec:P [(const_int 0)] UNSPEC_TP) + (match_operand:P 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_X32" + "add{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}" + [(set_attr "type" "alu") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +;; The Sun linker took the AMD64 TLS spec literally and can only handle +;; %rax as destination of the initial executable code sequence. +(define_insn "tls_initial_exec_64_sun" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec:DI + [(match_operand 1 "tls_symbolic_operand")] + UNSPEC_TLS_IE_SUN)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_SUN_TLS" +{ + output_asm_insn + ("mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}", operands); + return "add{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}"; +} + [(set_attr "type" "multi")]) + +;; GNU2 TLS patterns can be split. + +(define_expand "tls_dynamic_gnu2_32" + [(set (match_dup 3) + (plus:SI (match_operand:SI 2 "register_operand") + (const:SI + (unspec:SI [(match_operand 1 "tls_symbolic_operand")] + UNSPEC_TLSDESC)))) + (parallel + [(set (match_operand:SI 0 "register_operand") + (unspec:SI [(match_dup 1) (match_dup 3) + (match_dup 2) (reg:SI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))])] + "!TARGET_64BIT && TARGET_GNU2_TLS" +{ + operands[3] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0]; + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) + +(define_insn "*tls_dynamic_gnu2_lea_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "b") + (const:SI + (unspec:SI [(match_operand 2 "tls_symbolic_operand")] + UNSPEC_TLSDESC))))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "lea{l}\t{%E2@TLSDESC(%1), %0|%0, %E2@TLSDESC[%1]}" + [(set_attr "type" "lea") + (set_attr "mode" "SI") + (set_attr "length" "6") + (set_attr "length_address" "4")]) + +(define_insn "*tls_dynamic_gnu2_call_32" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand 1 "tls_symbolic_operand") + (match_operand:SI 2 "register_operand" "0") + ;; we have to make sure %ebx still points to the GOT + (match_operand:SI 3 "register_operand" "b") + (reg:SI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}" + [(set_attr "type" "call") + (set_attr "length" "2") + (set_attr "length_address" "0")]) + +(define_insn_and_split "*tls_dynamic_gnu2_combine_32" + [(set (match_operand:SI 0 "register_operand" "=&a") + (plus:SI + (unspec:SI [(match_operand 3 "tls_modbase_operand") + (match_operand:SI 4) + (match_operand:SI 2 "register_operand" "b") + (reg:SI SP_REG)] + UNSPEC_TLSDESC) + (const:SI (unspec:SI + [(match_operand 1 "tls_symbolic_operand")] + UNSPEC_DTPOFF)))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "#" + "" + [(set (match_dup 0) (match_dup 5))] +{ + operands[5] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0]; + emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2])); +}) + +(define_expand "tls_dynamic_gnu2_64" + [(set (match_dup 2) + (unspec:DI [(match_operand 1 "tls_symbolic_operand")] + UNSPEC_TLSDESC)) + (parallel + [(set (match_operand:DI 0 "register_operand") + (unspec:DI [(match_dup 1) (match_dup 2) (reg:DI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && TARGET_GNU2_TLS" +{ + operands[2] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0]; + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) + +(define_insn "*tls_dynamic_gnu2_lea_64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand 1 "tls_symbolic_operand")] + UNSPEC_TLSDESC))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "lea{q}\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}" + [(set_attr "type" "lea") + (set_attr "mode" "DI") + (set_attr "length" "7") + (set_attr "length_address" "4")]) + +(define_insn "*tls_dynamic_gnu2_call_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec:DI [(match_operand 1 "tls_symbolic_operand") + (match_operand:DI 2 "register_operand" "0") + (reg:DI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}" + [(set_attr "type" "call") + (set_attr "length" "2") + (set_attr "length_address" "0")]) + +(define_insn_and_split "*tls_dynamic_gnu2_combine_64" + [(set (match_operand:DI 0 "register_operand" "=&a") + (plus:DI + (unspec:DI [(match_operand 2 "tls_modbase_operand") + (match_operand:DI 3) + (reg:DI SP_REG)] + UNSPEC_TLSDESC) + (const:DI (unspec:DI + [(match_operand 1 "tls_symbolic_operand")] + UNSPEC_DTPOFF)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "#" + "" + [(set (match_dup 0) (match_dup 4))] +{ + operands[4] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0]; + emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1])); +}) + +;; These patterns match the binary 387 instructions for addM3, subM3, +;; mulM3 and divM3. There are three patterns for each of DFmode and +;; SFmode. The first is the normal insn, the second the same insn but +;; with one operand a conversion, and the third the same insn but with +;; the other operand a conversion. The conversion may be SFmode or +;; SImode if the target mode DFmode, but only SImode if the target mode +;; is SFmode. + +;; Gcc is slightly more smart about handling normal two address instructions +;; so use special patterns for add and mull. + +(define_insn "*fop_<mode>_comm_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,x,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm,xm")]))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387 + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (eq_attr "alternative" "1,2") + (if_then_else (match_operand:MODEF 3 "mult_operator") + (const_string "ssemul") + (const_string "sseadd")) + (if_then_else (match_operand:MODEF 3 "mult_operator") + (const_string "fmul") + (const_string "fop")))) + (set_attr "isa" "*,noavx,avx") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_<mode>_comm_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x,v") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0,v") + (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")]))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:MODEF 3 "mult_operator") + (const_string "ssemul") + (const_string "sseadd"))) + (set_attr "isa" "noavx,avx") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_<mode>_comm_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "fm")]))] + "TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode) + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:MODEF 3 "mult_operator") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_<mode>_1_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm,xm")]))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387 + && !COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(and (eq_attr "alternative" "2,3") + (match_operand:MODEF 3 "mult_operator")) + (const_string "ssemul") + (and (eq_attr "alternative" "2,3") + (match_operand:MODEF 3 "div_operator")) + (const_string "ssediv") + (eq_attr "alternative" "2,3") + (const_string "sseadd") + (match_operand:MODEF 3 "mult_operator") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "isa" "*,*,noavx,avx") + (set_attr "prefix" "orig,orig,orig,vex") + (set_attr "mode" "<MODE>")]) + +(define_insn "*rcpsf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RCP))] + "TARGET_SSE_MATH" + "%vrcpss\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") + (set_attr "btver2_sse_attr" "rcp") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SF")]) + +(define_insn "*fop_<mode>_1_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "register_operand" "0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")]))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator") + (const_string "ssemul") + (match_operand:MODEF 3 "div_operator") + (const_string "ssediv") + ] + (const_string "sseadd"))) + (set_attr "isa" "noavx,avx") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "<MODE>")]) + +;; This pattern is not fully shadowed by the pattern above. +(define_insn "*fop_<mode>_1_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0")]))] + "TARGET_80387 && X87_ENABLE_ARITH (<MODE>mode) + && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + && !COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "<MODE>")]) + +;; ??? Add SSE splitters for these! +(define_insn "*fop_<MODEF:mode>_2_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (match_operator:MODEF 3 "binary_fp_operator" + [(float:MODEF + (match_operand:SWI24 1 "nonimmediate_operand" "m")) + (match_operand:MODEF 2 "register_operand" "0")]))] + "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI24:MODE>mode) + && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH) + && (TARGET_USE_<SWI24:MODE>MODE_FIOP + || optimize_function_for_size_p (cfun))" + { return output_387_binary_op (insn, operands); } + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "<SWI24:MODE>")]) + +(define_insn "*fop_<MODEF:mode>_3_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "register_operand" "0") + (float:MODEF + (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))] + "TARGET_80387 && X87_ENABLE_FLOAT (<MODEF:MODE>mode, <SWI24:MODE>mode) + && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH) + && (TARGET_USE_<SWI24:MODE>MODE_FIOP + || optimize_function_for_size_p (cfun))" + { return output_387_binary_op (insn, operands); } + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_df_4_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "fm,0")) + (match_operand:DF 2 "register_operand" "0,f")]))] + "TARGET_80387 && X87_ENABLE_ARITH (DFmode) + && !(TARGET_SSE2 && TARGET_SSE_MATH) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator") + (const_string "fmul") + (match_operand:DF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_df_5_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "0,f") + (float_extend:DF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387 && X87_ENABLE_ARITH (DFmode) + && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator") + (const_string "fmul") + (match_operand:DF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_df_6_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float_extend:DF + (match_operand:SF 1 "register_operand" "0,f")) + (float_extend:DF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387 && X87_ENABLE_ARITH (DFmode) + && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator") + (const_string "fmul") + (match_operand:DF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_xf_comm_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "%0") + (match_operand:XF 2 "register_operand" "f")]))] + "TARGET_80387 + && COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:XF 3 "mult_operator") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "XF")]) + +(define_insn "*fop_xf_1_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (match_operand:XF 2 "register_operand" "f,0")]))] + "TARGET_80387 + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator") + (const_string "fmul") + (match_operand:XF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "XF")]) + +(define_insn "*fop_xf_2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (match_operator:XF 3 "binary_fp_operator" + [(float:XF + (match_operand:SWI24 1 "nonimmediate_operand" "m")) + (match_operand:XF 2 "register_operand" "0")]))] + "TARGET_80387 + && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))" + { return output_387_binary_op (insn, operands); } + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator") + (const_string "fmul") + (match_operand:XF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_xf_3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0") + (float:XF + (match_operand:SWI24 2 "nonimmediate_operand" "m"))]))] + "TARGET_80387 + && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))" + { return output_387_binary_op (insn, operands); } + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator") + (const_string "fmul") + (match_operand:XF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_xf_4_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float_extend:XF + (match_operand:MODEF 1 "nonimmediate_operand" "fm,0")) + (match_operand:XF 2 "register_operand" "0,f")]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator") + (const_string "fmul") + (match_operand:XF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_xf_5_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (float_extend:XF + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator") + (const_string "fmul") + (match_operand:XF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "<MODE>")]) + +(define_insn "*fop_xf_6_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0,f")) + (float_extend:XF + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator") + (const_string "fmul") + (match_operand:XF 3 "div_operator") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "<MODE>")]) + +;; FPU special functions. + +;; This pattern implements a no-op XFmode truncation for +;; all fancy i386 XFmode math functions. + +(define_insn "truncxf<mode>2_i387_noop_unspec" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_TRUNC_NOOP))] + "TARGET_USE_FANCY_MATH_387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "<MODE>")]) + +(define_insn "sqrtxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF (match_operand:XF 1 "register_operand" "0")))] + "TARGET_USE_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct")]) + +(define_insn "sqrt_extend<mode>xf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF + (float_extend:XF + (match_operand:MODEF 1 "register_operand" "0"))))] + "TARGET_USE_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct") + (set_attr "bdver1_decode" "direct")]) + +(define_insn "*rsqrtsf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RSQRT))] + "TARGET_SSE_MATH" + "%vrsqrtss\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") + (set_attr "btver2_sse_attr" "rcp") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SF")]) + +(define_expand "rsqrtsf2" + [(set (match_operand:SF 0 "register_operand") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand")] + UNSPEC_RSQRT))] + "TARGET_SSE_MATH" +{ + ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1); + DONE; +}) + +(define_insn "*sqrt<mode>2_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (sqrt:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" + "%vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "sqrt") + (set_attr "btver2_sse_attr" "sqrt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "<MODE>") + (set_attr "athlon_decode" "*") + (set_attr "amdfam10_decode" "*") + (set_attr "bdver1_decode" "*")]) + +(define_expand "sqrt<mode>2" + [(set (match_operand:MODEF 0 "register_operand") + (sqrt:MODEF + (match_operand:MODEF 1 "nonimmediate_operand")))] + "(TARGET_USE_FANCY_MATH_387 && X87_ENABLE_ARITH (<MODE>mode)) + || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" +{ + if (<MODE>mode == SFmode + && TARGET_SSE_MATH + && TARGET_RECIP_SQRT + && !optimize_function_for_size_p (cfun) + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0); + DONE; + } + + if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = force_reg (<MODE>mode, operands[1]); + + emit_insn (gen_sqrt_extend<mode>xf2_i387 (op0, op1)); + emit_insn (gen_truncxf<mode>2_i387_noop_unspec (operands[0], op0)); + DONE; + } +}) + +(define_insn "fpremxf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FPREM_F)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FPREM_U)) + (set (reg:CCFP FPSR_REG) + (unspec:CCFP [(match_dup 2) (match_dup 3)] + UNSPEC_C2_FLAG))] + "TARGET_USE_FANCY_MATH_387" + "fprem" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "fmodxf3" + [(use (match_operand:XF 0 "register_operand")) + (use (match_operand:XF 1 "general_operand")) + (use (match_operand:XF 2 "general_operand"))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_move_insn (op2, operands[2]); + emit_move_insn (op1, operands[1]); + + emit_label (label); + emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_expand "fmod<mode>3" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "general_operand")) + (use (match_operand:MODEF 2 "general_operand"))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx (*gen_truncxf) (rtx, rtx); + + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op2, operands[2])); + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + + emit_label (label); + emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + /* Truncate the result properly for strict SSE math. */ + if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && !TARGET_MIX_SSE_I387) + gen_truncxf = gen_truncxf<mode>2; + else + gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec; + + emit_insn (gen_truncxf (operands[0], op1)); + DONE; +}) + +(define_insn "fprem1xf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FPREM1_F)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FPREM1_U)) + (set (reg:CCFP FPSR_REG) + (unspec:CCFP [(match_dup 2) (match_dup 3)] + UNSPEC_C2_FLAG))] + "TARGET_USE_FANCY_MATH_387" + "fprem1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "remainderxf3" + [(use (match_operand:XF 0 "register_operand")) + (use (match_operand:XF 1 "general_operand")) + (use (match_operand:XF 2 "general_operand"))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_move_insn (op2, operands[2]); + emit_move_insn (op1, operands[1]); + + emit_label (label); + emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_expand "remainder<mode>3" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "general_operand")) + (use (match_operand:MODEF 2 "general_operand"))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx (*gen_truncxf) (rtx, rtx); + + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op2, operands[2])); + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + + emit_label (label); + + emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + /* Truncate the result properly for strict SSE math. */ + if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && !TARGET_MIX_SSE_I387) + gen_truncxf = gen_truncxf<mode>2; + else + gen_truncxf = gen_truncxf<mode>2_i387_noop_unspec; + + emit_insn (gen_truncxf (operands[0], op1)); + DONE; +}) + +(define_int_iterator SINCOS + [UNSPEC_SIN + UNSPEC_COS]) + +(define_int_attr sincos + [(UNSPEC_SIN "sin") + (UNSPEC_COS "cos")]) + +(define_insn "*<sincos>xf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + SINCOS))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "f<sincos>" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*<sincos>_extend<mode>xf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0"))] + SINCOS))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "f<sincos>" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +;; When sincos pattern is defined, sin and cos builtin functions will be +;; expanded to sincos pattern with one of its outputs left unused. +;; CSE pass will figure out if two sincos patterns can be combined, +;; otherwise sincos pattern will be split back to sin or cos pattern, +;; depending on the unused output. + +(define_insn "sincosxf3" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fsincos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_split + [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_operand:XF 2 "register_operand")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) + && can_create_pseudo_p ()" + [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))]) + +(define_split + [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_operand:XF 2 "register_operand")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) + && can_create_pseudo_p ()" + [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))]) + +(define_insn "sincos_extend<mode>xf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" "0"))] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fsincos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_split + [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand"))] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) + && can_create_pseudo_p ()" + [(set (match_dup 1) + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))]) + +(define_split + [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand"))] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) + && can_create_pseudo_p ()" + [(set (match_dup 0) + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))]) + +(define_expand "sincos<mode>3" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "register_operand")) + (use (match_operand:MODEF 2 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_sincos_extend<mode>xf3_i387 (op0, op1, operands[2])); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[1], op1)); + DONE; +}) + +(define_insn "fptanxf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (match_operand:XF 3 "const_double_operand" "F")) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_TAN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && standard_80387_constant_p (operands[3]) == 2" + "fptan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fptan_extend<mode>xf4_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (match_operand:MODEF 3 "const_double_operand" "F")) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" "0"))] + UNSPEC_TAN))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations + && standard_80387_constant_p (operands[3]) == 2" + "fptan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "tanxf2" + [(use (match_operand:XF 0 "register_operand")) + (use (match_operand:XF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx one = gen_reg_rtx (XFmode); + rtx op2 = CONST1_RTX (XFmode); /* fld1 */ + + emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "tan<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx one = gen_reg_rtx (<MODE>mode); + rtx op2 = CONST1_RTX (<MODE>mode); /* fld1 */ + + emit_insn (gen_fptan_extend<mode>xf4_i387 (one, op0, + operands[1], op2)); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "*fpatanxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0") + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fpatan_extend<mode>xf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0")) + (float_extend:XF + (match_operand:MODEF 2 "register_operand" "u"))] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "atan2xf3" + [(parallel [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_operand:XF 2 "register_operand") + (match_operand:XF 1 "register_operand")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations") + +(define_expand "atan2<mode>3" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "register_operand")) + (use (match_operand:MODEF 2 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, operands[2], operands[1])); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "atanxf2" + [(parallel [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_dup 2) + (match_operand:XF 1 "register_operand")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "atan<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (<MODE>mode); + emit_move_insn (op2, CONST1_RTX (<MODE>mode)); /* fld1 */ + + emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, op2, operands[1])); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "asinxf2" + [(set (match_dup 2) + (mult:XF (match_operand:XF 1 "register_operand") + (match_dup 1))) + (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2))) + (set (match_dup 5) (sqrt:XF (match_dup 4))) + (parallel [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_dup 5) (match_dup 1)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 6))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 2; i < 6; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "asin<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "general_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + if (optimize_insn_for_size_p ()) + FAIL; + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_asinxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "acosxf2" + [(set (match_dup 2) + (mult:XF (match_operand:XF 1 "register_operand") + (match_dup 1))) + (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2))) + (set (match_dup 5) (sqrt:XF (match_dup 4))) + (parallel [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_dup 1) (match_dup 5)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 6))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 2; i < 6; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "acos<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "general_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + if (optimize_insn_for_size_p ()) + FAIL; + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_acosxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fyl2xxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0") + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fyl2x" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fyl2x_extend<mode>xf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0")) + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fyl2x" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "logxf2" + [(parallel [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_operand:XF 1 "register_operand") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], standard_80387_constant_rtx (4)); /* fldln2 */ +}) + +(define_expand "log<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (4)); /* fldln2 */ + + emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2)); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "log10xf2" + [(parallel [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_operand:XF 1 "register_operand") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], standard_80387_constant_rtx (3)); /* fldlg2 */ +}) + +(define_expand "log10<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (3)); /* fldlg2 */ + + emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2)); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "log2xf2" + [(parallel [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_operand:XF 1 "register_operand") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "log2<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */ + + emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2)); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fyl2xp1xf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0") + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2XP1)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fyl2xp1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fyl2xp1_extend<mode>xf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0")) + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2XP1)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fyl2xp1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "log1pxf2" + [(use (match_operand:XF 0 "register_operand")) + (use (match_operand:XF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + ix86_emit_i387_log1p (operands[0], operands[1]); + DONE; +}) + +(define_expand "log1p<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + + operands[1] = gen_rtx_FLOAT_EXTEND (XFmode, operands[1]); + + ix86_emit_i387_log1p (op0, operands[1]); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fxtractxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fxtract" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fxtract_extend<mode>xf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" "0"))] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_XTRACT_EXP))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fxtract" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "logbxf2" + [(parallel [(set (match_dup 2) + (unspec:XF [(match_operand:XF 1 "register_operand")] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "operands[2] = gen_reg_rtx (XFmode);") + +(define_expand "logb<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1])); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op1)); + DONE; +}) + +(define_expand "ilogbxf2" + [(use (match_operand:SI 0 "register_operand")) + (use (match_operand:XF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1])); + emit_insn (gen_fix_truncxfsi2 (operands[0], op1)); + DONE; +}) + +(define_expand "ilogb<mode>2" + [(use (match_operand:SI 0 "register_operand")) + (use (match_operand:MODEF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1])); + emit_insn (gen_fix_truncxfsi2 (operands[0], op1)); + DONE; +}) + +(define_insn "*f2xm1xf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_F2XM1))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "f2xm1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fscalexf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FSCALE_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FSCALE_EXP))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fscale" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "expNcorexf3" + [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand") + (match_operand:XF 2 "register_operand"))) + (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) + (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) + (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) + (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7))) + (parallel [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_dup 8) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 9) + (unspec:XF [(match_dup 8) (match_dup 4)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 3; i < 10; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[7], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "expxf2" + [(use (match_operand:XF 0 "register_operand")) + (use (match_operand:XF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */ + + emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "exp<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "general_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_expxf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "exp10xf2" + [(use (match_operand:XF 0 "register_operand")) + (use (match_operand:XF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */ + + emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "exp10<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "general_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_exp10xf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "exp2xf2" + [(use (match_operand:XF 0 "register_operand")) + (use (match_operand:XF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */ + + emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "exp2<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "general_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_exp2xf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "expm1xf2" + [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand") + (match_dup 2))) + (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) + (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) + (set (match_dup 9) (float_extend:XF (match_dup 13))) + (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) + (parallel [(set (match_dup 7) + (unspec:XF [(match_dup 6) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 8) + (unspec:XF [(match_dup 6) (match_dup 4)] + UNSPEC_FSCALE_EXP))]) + (parallel [(set (match_dup 10) + (unspec:XF [(match_dup 9) (match_dup 8)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 11) + (unspec:XF [(match_dup 9) (match_dup 8)] + UNSPEC_FSCALE_EXP))]) + (set (match_dup 12) (minus:XF (match_dup 10) + (float_extend:XF (match_dup 13)))) + (set (match_operand:XF 0 "register_operand") + (plus:XF (match_dup 12) (match_dup 7)))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 2; i < 13; i++) + operands[i] = gen_reg_rtx (XFmode); + + operands[13] + = validize_mem (force_const_mem (SFmode, CONST1_RTX (SFmode))); /* fld1 */ + + emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */ +}) + +(define_expand "expm1<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "general_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_expm1xf2 (op0, op1)); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "ldexpxf3" + [(match_operand:XF 0 "register_operand") + (match_operand:XF 1 "register_operand") + (match_operand:SI 2 "register_operand")] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + operands[3] = gen_reg_rtx (XFmode); + operands[4] = gen_reg_rtx (XFmode); + + emit_insn (gen_floatsixf2 (operands[3], operands[2])); + emit_insn (gen_fscalexf4_i387 (operands[0], operands[4], + operands[1], operands[3])); + DONE; +}) + +(define_expand "ldexp<mode>3" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "general_operand")) + (use (match_operand:SI 2 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_ldexpxf3 (op0, op1, operands[2])); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "scalbxf3" + [(parallel [(set (match_operand:XF 0 " register_operand") + (unspec:XF [(match_operand:XF 1 "register_operand") + (match_operand:XF 2 "register_operand")] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 3) + (unspec:XF [(match_dup 1) (match_dup 2)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + operands[3] = gen_reg_rtx (XFmode); +}) + +(define_expand "scalb<mode>3" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "general_operand")) + (use (match_operand:MODEF 2 "general_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1, op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_extend<mode>xf2 (op2, operands[2])); + emit_insn (gen_scalbxf3 (op0, op1, op2)); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "significandxf2" + [(parallel [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_operand:XF 1 "register_operand")] + UNSPEC_XTRACT_FRACT)) + (set (match_dup 2) + (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "operands[2] = gen_reg_rtx (XFmode);") + +(define_expand "significand<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1])); + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + + +(define_insn "sse4_1_round<mode>2" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_15_operand" "n")] + UNSPEC_ROUND))] + "TARGET_ROUND" + "%vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "<MODE>")]) + +(define_insn "rintxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "frndint" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "rint<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "register_operand"))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && !flag_trapping_math)" +{ + if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && !flag_trapping_math) + { + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round<mode>2 + (operands[0], operands[1], GEN_INT (ROUND_MXCSR))); + else if (optimize_insn_for_size_p ()) + FAIL; + else + ix86_expand_rint (operands[0], operands[1]); + } + else + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_rintxf2 (op0, op1)); + + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + } + DONE; +}) + +(define_expand "round<mode>2" + [(match_operand:X87MODEF 0 "register_operand") + (match_operand:X87MODEF 1 "nonimmediate_operand")] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && !flag_trapping_math && !flag_rounding_math)" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && !flag_trapping_math && !flag_rounding_math) + { + if (TARGET_ROUND) + { + operands[1] = force_reg (<MODE>mode, operands[1]); + ix86_expand_round_sse4 (operands[0], operands[1]); + } + else if (TARGET_64BIT || (<MODE>mode != DFmode)) + ix86_expand_round (operands[0], operands[1]); + else + ix86_expand_rounddf_32 (operands[0], operands[1]); + } + else + { + operands[1] = force_reg (<MODE>mode, operands[1]); + ix86_emit_i387_round (operands[0], operands[1]); + } + DONE; +}) + +(define_insn_and_split "*fistdi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand") + (unspec:DI [(match_operand:XF 1 "register_operand")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fistdi2 (operands[0], operands[1])); + else + { + operands[2] = assign_386_stack_local (DImode, SLOT_TEMP); + emit_insn (gen_fistdi2_with_temp (operands[0], operands[1], + operands[2])); + } + DONE; +} + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST)) + (clobber (match_scratch:XF 2 "=&1f"))] + "TARGET_USE_FANCY_MATH_387" + "* return output_fix_trunc (insn, operands, false);" + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 3 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387" + "#" + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (unspec:DI [(match_operand:XF 1 "register_operand")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand")) + (clobber (match_scratch 3))] + "reload_completed" + [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) + (clobber (match_dup 3))]) + (set (match_dup 0) (match_dup 2))]) + +(define_split + [(set (match_operand:DI 0 "memory_operand") + (unspec:DI [(match_operand:XF 1 "register_operand")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand")) + (clobber (match_scratch 3))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) + (clobber (match_dup 3))])]) + +(define_insn_and_split "*fist<mode>2_1" + [(set (match_operand:SWI24 0 "register_operand") + (unspec:SWI24 [(match_operand:XF 1 "register_operand")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); + emit_insn (gen_fist<mode>2_with_temp (operands[0], operands[1], + operands[2])); + DONE; +} + [(set_attr "type" "fpspc") + (set_attr "mode" "<MODE>")]) + +(define_insn "fist<mode>2" + [(set (match_operand:SWI24 0 "memory_operand" "=m") + (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387" + "* return output_fix_trunc (insn, operands, false);" + [(set_attr "type" "fpspc") + (set_attr "mode" "<MODE>")]) + +(define_insn "fist<mode>2_with_temp" + [(set (match_operand:SWI24 0 "register_operand" "=r") + (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST)) + (clobber (match_operand:SWI24 2 "memory_operand" "=m"))] + "TARGET_USE_FANCY_MATH_387" + "#" + [(set_attr "type" "fpspc") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:SWI24 0 "register_operand") + (unspec:SWI24 [(match_operand:XF 1 "register_operand")] + UNSPEC_FIST)) + (clobber (match_operand:SWI24 2 "memory_operand"))] + "reload_completed" + [(set (match_dup 2) (unspec:SWI24 [(match_dup 1)] UNSPEC_FIST)) + (set (match_dup 0) (match_dup 2))]) + +(define_split + [(set (match_operand:SWI24 0 "memory_operand") + (unspec:SWI24 [(match_operand:XF 1 "register_operand")] + UNSPEC_FIST)) + (clobber (match_operand:SWI24 2 "memory_operand"))] + "reload_completed" + [(set (match_dup 0) (unspec:SWI24 [(match_dup 1)] UNSPEC_FIST))]) + +(define_expand "lrintxf<mode>2" + [(set (match_operand:SWI248x 0 "nonimmediate_operand") + (unspec:SWI248x [(match_operand:XF 1 "register_operand")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387") + +(define_expand "lrint<MODEF:mode><SWI48:mode>2" + [(set (match_operand:SWI48 0 "nonimmediate_operand") + (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")] + UNSPEC_FIX_NOTRUNC))] + "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH") + +(define_expand "lround<X87MODEF:mode><SWI248x:mode>2" + [(match_operand:SWI248x 0 "nonimmediate_operand") + (match_operand:X87MODEF 1 "register_operand")] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH + && <SWI248x:MODE>mode != HImode + && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT) + && !flag_trapping_math && !flag_rounding_math)" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH + && <SWI248x:MODE>mode != HImode + && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT) + && !flag_trapping_math && !flag_rounding_math) + ix86_expand_lround (operands[0], operands[1]); + else + ix86_emit_i387_round (operands[0], operands[1]); + DONE; +}) + +(define_int_iterator FRNDINT_ROUNDING + [UNSPEC_FRNDINT_FLOOR + UNSPEC_FRNDINT_CEIL + UNSPEC_FRNDINT_TRUNC]) + +(define_int_iterator FIST_ROUNDING + [UNSPEC_FIST_FLOOR + UNSPEC_FIST_CEIL]) + +;; Base name for define_insn +(define_int_attr rounding_insn + [(UNSPEC_FRNDINT_FLOOR "floor") + (UNSPEC_FRNDINT_CEIL "ceil") + (UNSPEC_FRNDINT_TRUNC "btrunc") + (UNSPEC_FIST_FLOOR "floor") + (UNSPEC_FIST_CEIL "ceil")]) + +(define_int_attr rounding + [(UNSPEC_FRNDINT_FLOOR "floor") + (UNSPEC_FRNDINT_CEIL "ceil") + (UNSPEC_FRNDINT_TRUNC "trunc") + (UNSPEC_FIST_FLOOR "floor") + (UNSPEC_FIST_CEIL "ceil")]) + +(define_int_attr ROUNDING + [(UNSPEC_FRNDINT_FLOOR "FLOOR") + (UNSPEC_FRNDINT_CEIL "CEIL") + (UNSPEC_FRNDINT_TRUNC "TRUNC") + (UNSPEC_FIST_FLOOR "FLOOR") + (UNSPEC_FIST_CEIL "CEIL")]) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_<rounding>" + [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_operand:XF 1 "register_operand")] + FRNDINT_ROUNDING)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_<ROUNDING>] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>); + + emit_insn (gen_frndintxf2_<rounding>_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "<rounding>") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_<rounding>_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + FRNDINT_ROUNDING)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "<rounding>") + (set_attr "mode" "XF")]) + +(define_expand "<rounding_insn>xf2" + [(parallel [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_operand:XF 1 "register_operand")] + FRNDINT_ROUNDING)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !optimize_insn_for_size_p ()") + +(define_expand "<rounding_insn><mode>2" + [(parallel [(set (match_operand:MODEF 0 "register_operand") + (unspec:MODEF [(match_operand:MODEF 1 "register_operand")] + FRNDINT_ROUNDING)) + (clobber (reg:CC FLAGS_REG))])] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && !flag_trapping_math)" +{ + if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && !flag_trapping_math) + { + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round<mode>2 + (operands[0], operands[1], GEN_INT (ROUND_<ROUNDING>))); + else if (optimize_insn_for_size_p ()) + FAIL; + else if (TARGET_64BIT || (<MODE>mode != DFmode)) + { + if (ROUND_<ROUNDING> == ROUND_FLOOR) + ix86_expand_floorceil (operands[0], operands[1], true); + else if (ROUND_<ROUNDING> == ROUND_CEIL) + ix86_expand_floorceil (operands[0], operands[1], false); + else if (ROUND_<ROUNDING> == ROUND_TRUNC) + ix86_expand_trunc (operands[0], operands[1]); + else + gcc_unreachable (); + } + else + { + if (ROUND_<ROUNDING> == ROUND_FLOOR) + ix86_expand_floorceildf_32 (operands[0], operands[1], true); + else if (ROUND_<ROUNDING> == ROUND_CEIL) + ix86_expand_floorceildf_32 (operands[0], operands[1], false); + else if (ROUND_<ROUNDING> == ROUND_TRUNC) + ix86_expand_truncdf_32 (operands[0], operands[1]); + else + gcc_unreachable (); + } + } + else + { + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_<rounding> (op0, op1)); + + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + } + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_mask_pm" + [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_operand:XF 1 "register_operand")] + UNSPEC_FRNDINT_MASK_PM)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_MASK_PM] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM); + + emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "mask_pm") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_mask_pm_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_MASK_PM)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "mask_pm") + (set_attr "mode" "XF")]) + +(define_expand "nearbyintxf2" + [(parallel [(set (match_operand:XF 0 "register_operand") + (unspec:XF [(match_operand:XF 1 "register_operand")] + UNSPEC_FRNDINT_MASK_PM)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations") + +(define_expand "nearbyint<mode>2" + [(use (match_operand:MODEF 0 "register_operand")) + (use (match_operand:MODEF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extend<mode>xf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_mask_pm (op0, op1)); + + emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0)); + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "*fist<mode>2_<rounding>_1" + [(set (match_operand:SWI248x 0 "nonimmediate_operand") + (unspec:SWI248x [(match_operand:XF 1 "register_operand")] + FIST_ROUNDING)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_<ROUNDING>] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_<ROUNDING>); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fist<mode>2_<rounding> (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP); + emit_insn (gen_fist<mode>2_<rounding>_with_temp + (operands[0], operands[1], operands[2], + operands[3], operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "<rounding>") + (set_attr "mode" "<MODE>")]) + +(define_insn "fistdi2_<rounding>" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + FIST_ROUNDING)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, false);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "<rounding>") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_<rounding>_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + FIST_ROUNDING)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "<rounding>") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (unspec:DI [(match_operand:XF 1 "register_operand")] + FIST_ROUNDING)) + (use (match_operand:HI 2 "memory_operand")) + (use (match_operand:HI 3 "memory_operand")) + (clobber (match_operand:DI 4 "memory_operand")) + (clobber (match_scratch 5))] + "reload_completed" + [(parallel [(set (match_dup 4) + (unspec:DI [(match_dup 1)] FIST_ROUNDING)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))]) + +(define_split + [(set (match_operand:DI 0 "memory_operand") + (unspec:DI [(match_operand:XF 1 "register_operand")] + FIST_ROUNDING)) + (use (match_operand:HI 2 "memory_operand")) + (use (match_operand:HI 3 "memory_operand")) + (clobber (match_operand:DI 4 "memory_operand")) + (clobber (match_scratch 5))] + "reload_completed" + [(parallel [(set (match_dup 0) + (unspec:DI [(match_dup 1)] FIST_ROUNDING)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])]) + +(define_insn "fist<mode>2_<rounding>" + [(set (match_operand:SWI24 0 "memory_operand" "=m") + (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f")] + FIST_ROUNDING)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, false);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "<rounding>") + (set_attr "mode" "<MODE>")]) + +(define_insn "fist<mode>2_<rounding>_with_temp" + [(set (match_operand:SWI24 0 "nonimmediate_operand" "=m,?r") + (unspec:SWI24 [(match_operand:XF 1 "register_operand" "f,f")] + FIST_ROUNDING)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:SWI24 4 "memory_operand" "=X,m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "<rounding>") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:SWI24 0 "register_operand") + (unspec:SWI24 [(match_operand:XF 1 "register_operand")] + FIST_ROUNDING)) + (use (match_operand:HI 2 "memory_operand")) + (use (match_operand:HI 3 "memory_operand")) + (clobber (match_operand:SWI24 4 "memory_operand"))] + "reload_completed" + [(parallel [(set (match_dup 4) + (unspec:SWI24 [(match_dup 1)] FIST_ROUNDING)) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))]) + +(define_split + [(set (match_operand:SWI24 0 "memory_operand") + (unspec:SWI24 [(match_operand:XF 1 "register_operand")] + FIST_ROUNDING)) + (use (match_operand:HI 2 "memory_operand")) + (use (match_operand:HI 3 "memory_operand")) + (clobber (match_operand:SWI24 4 "memory_operand"))] + "reload_completed" + [(parallel [(set (match_dup 0) + (unspec:SWI24 [(match_dup 1)] FIST_ROUNDING)) + (use (match_dup 2)) + (use (match_dup 3))])]) + +(define_expand "l<rounding_insn>xf<mode>2" + [(parallel [(set (match_operand:SWI248x 0 "nonimmediate_operand") + (unspec:SWI248x [(match_operand:XF 1 "register_operand")] + FIST_ROUNDING)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations") + +(define_expand "l<rounding_insn><MODEF:mode><SWI48:mode>2" + [(parallel [(set (match_operand:SWI48 0 "nonimmediate_operand") + (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")] + FIST_ROUNDING)) + (clobber (reg:CC FLAGS_REG))])] + "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH + && !flag_trapping_math" +{ + if (TARGET_64BIT && optimize_insn_for_size_p ()) + FAIL; + + if (ROUND_<ROUNDING> == ROUND_FLOOR) + ix86_expand_lfloorceil (operands[0], operands[1], true); + else if (ROUND_<ROUNDING> == ROUND_CEIL) + ix86_expand_lfloorceil (operands[0], operands[1], false); + else + gcc_unreachable (); + + DONE; +}) + +(define_insn "fxam<mode>2_i387" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(match_operand:X87MODEF 1 "register_operand" "f")] + UNSPEC_FXAM))] + "TARGET_USE_FANCY_MATH_387" + "fxam\n\tfnstsw\t%0" + [(set_attr "type" "multi") + (set_attr "length" "4") + (set_attr "unit" "i387") + (set_attr "mode" "<MODE>")]) + +(define_insn_and_split "fxam<mode>2_i387_with_temp" + [(set (match_operand:HI 0 "register_operand") + (unspec:HI + [(match_operand:MODEF 1 "memory_operand")] + UNSPEC_FXAM_MEM))] + "TARGET_USE_FANCY_MATH_387 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 2)(match_dup 1)) + (set (match_dup 0) + (unspec:HI [(match_dup 2)] UNSPEC_FXAM))] +{ + operands[2] = gen_reg_rtx (<MODE>mode); + + MEM_VOLATILE_P (operands[1]) = 1; +} + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "<MODE>")]) + +(define_expand "isinfxf2" + [(use (match_operand:SI 0 "register_operand")) + (use (match_operand:XF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && ix86_libc_has_function (function_c99_misc)" +{ + rtx mask = GEN_INT (0x45); + rtx val = GEN_INT (0x05); + + rtx cond; + + rtx scratch = gen_reg_rtx (HImode); + rtx res = gen_reg_rtx (QImode); + + emit_insn (gen_fxamxf2_i387 (scratch, operands[1])); + + emit_insn (gen_andqi_ext_0 (scratch, scratch, mask)); + emit_insn (gen_cmpqi_ext_3 (scratch, val)); + cond = gen_rtx_fmt_ee (EQ, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, res, cond)); + emit_insn (gen_zero_extendqisi2 (operands[0], res)); + DONE; +}) + +(define_expand "isinf<mode>2" + [(use (match_operand:SI 0 "register_operand")) + (use (match_operand:MODEF 1 "nonimmediate_operand"))] + "TARGET_USE_FANCY_MATH_387 + && ix86_libc_has_function (function_c99_misc) + && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" +{ + rtx mask = GEN_INT (0x45); + rtx val = GEN_INT (0x05); + + rtx cond; + + rtx scratch = gen_reg_rtx (HImode); + rtx res = gen_reg_rtx (QImode); + + /* Remove excess precision by forcing value through memory. */ + if (memory_operand (operands[1], VOIDmode)) + emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, operands[1])); + else + { + rtx temp = assign_386_stack_local (<MODE>mode, SLOT_TEMP); + + emit_move_insn (temp, operands[1]); + emit_insn (gen_fxam<mode>2_i387_with_temp (scratch, temp)); + } + + emit_insn (gen_andqi_ext_0 (scratch, scratch, mask)); + emit_insn (gen_cmpqi_ext_3 (scratch, val)); + cond = gen_rtx_fmt_ee (EQ, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, res, cond)); + emit_insn (gen_zero_extendqisi2 (operands[0], res)); + DONE; +}) + +(define_expand "signbitxf2" + [(use (match_operand:SI 0 "register_operand")) + (use (match_operand:XF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx scratch = gen_reg_rtx (HImode); + + emit_insn (gen_fxamxf2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, scratch), GEN_INT (0x200))); + DONE; +}) + +(define_insn "movmsk_df" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:DF 1 "register_operand" "x")] + UNSPEC_MOVMSK))] + "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH" + "%vmovmskpd\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "DF")]) + +;; Use movmskpd in SSE mode to avoid store forwarding stall +;; for 32bit targets and movq+shrq sequence for 64bit targets. +(define_expand "signbitdf2" + [(use (match_operand:SI 0 "register_operand")) + (use (match_operand:DF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH)" +{ + if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH) + { + emit_insn (gen_movmsk_df (operands[0], operands[1])); + emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); + } + else + { + rtx scratch = gen_reg_rtx (HImode); + + emit_insn (gen_fxamdf2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, scratch), GEN_INT (0x200))); + } + DONE; +}) + +(define_expand "signbitsf2" + [(use (match_operand:SI 0 "register_operand")) + (use (match_operand:SF 1 "register_operand"))] + "TARGET_USE_FANCY_MATH_387 + && !(SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)" +{ + rtx scratch = gen_reg_rtx (HImode); + + emit_insn (gen_fxamsf2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], + gen_lowpart (SImode, scratch), GEN_INT (0x200))); + DONE; +}) + +;; Block operation instructions + +(define_insn "cld" + [(unspec_volatile [(const_int 0)] UNSPECV_CLD)] + "" + "cld" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +(define_expand "movmem<mode>" + [(use (match_operand:BLK 0 "memory_operand")) + (use (match_operand:BLK 1 "memory_operand")) + (use (match_operand:SWI48 2 "nonmemory_operand")) + (use (match_operand:SWI48 3 "const_int_operand")) + (use (match_operand:SI 4 "const_int_operand")) + (use (match_operand:SI 5 "const_int_operand")) + (use (match_operand:SI 6 "")) + (use (match_operand:SI 7 "")) + (use (match_operand:SI 8 ""))] + "" +{ + if (ix86_expand_set_or_movmem (operands[0], operands[1], + operands[2], NULL, operands[3], + operands[4], operands[5], + operands[6], operands[7], + operands[8], false)) + DONE; + else + FAIL; +}) + +;; Most CPUs don't like single string operations +;; Handle this case here to simplify previous expander. + +(define_expand "strmov" + [(set (match_dup 4) (match_operand 3 "memory_operand")) + (set (match_operand 1 "memory_operand") (match_dup 4)) + (parallel [(set (match_operand 0 "register_operand") (match_dup 5)) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_operand 2 "register_operand") (match_dup 6)) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + rtx adjust = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[1]))); + + /* If .md ever supports :P for Pmode, these can be directly + in the pattern above. */ + operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust); + operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust); + + /* Can't use this if the user has appropriated esi or edi. */ + if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) + && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])) + { + emit_insn (gen_strmov_singleop (operands[0], operands[1], + operands[2], operands[3], + operands[5], operands[6])); + DONE; + } + + operands[4] = gen_reg_rtx (GET_MODE (operands[1])); +}) + +(define_expand "strmov_singleop" + [(parallel [(set (match_operand 1 "memory_operand") + (match_operand 3 "memory_operand")) + (set (match_operand 0 "register_operand") + (match_operand 4)) + (set (match_operand 2 "register_operand") + (match_operand 5))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*strmovdi_rex_1" + [(set (mem:DI (match_operand:P 2 "register_operand" "0")) + (mem:DI (match_operand:P 3 "register_operand" "1"))) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 2) + (const_int 8))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (match_dup 3) + (const_int 8)))] + "TARGET_64BIT + && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "%^movsq" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "DI")]) + +(define_insn "*strmovsi_1" + [(set (mem:SI (match_operand:P 2 "register_operand" "0")) + (mem:SI (match_operand:P 3 "register_operand" "1"))) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 2) + (const_int 4))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (match_dup 3) + (const_int 4)))] + "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "%^movs{l|d}" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_insn "*strmovhi_1" + [(set (mem:HI (match_operand:P 2 "register_operand" "0")) + (mem:HI (match_operand:P 3 "register_operand" "1"))) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 2) + (const_int 2))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (match_dup 3) + (const_int 2)))] + "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "%^movsw" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "HI")]) + +(define_insn "*strmovqi_1" + [(set (mem:QI (match_operand:P 2 "register_operand" "0")) + (mem:QI (match_operand:P 3 "register_operand" "1"))) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 2) + (const_int 1))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (match_dup 3) + (const_int 1)))] + "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "%^movsb" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set (attr "prefix_rex") + (if_then_else + (match_test "<P:MODE>mode == DImode") + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +(define_expand "rep_mov" + [(parallel [(set (match_operand 4 "register_operand") (const_int 0)) + (set (match_operand 0 "register_operand") + (match_operand 5)) + (set (match_operand 2 "register_operand") + (match_operand 6)) + (set (match_operand 1 "memory_operand") + (match_operand 3 "memory_operand")) + (use (match_dup 4))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*rep_movdi_rex64" + [(set (match_operand:P 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (ashift:P (match_operand:P 5 "register_operand" "2") + (const_int 3)) + (match_operand:P 3 "register_operand" "0"))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (ashift:P (match_dup 5) (const_int 3)) + (match_operand:P 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5))] + "TARGET_64BIT + && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "%^rep{%;} movsq" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "DI")]) + +(define_insn "*rep_movsi" + [(set (match_operand:P 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (ashift:P (match_operand:P 5 "register_operand" "2") + (const_int 2)) + (match_operand:P 3 "register_operand" "0"))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (ashift:P (match_dup 5) (const_int 2)) + (match_operand:P 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5))] + "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "%^rep{%;} movs{l|d}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_insn "*rep_movqi" + [(set (match_operand:P 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_operand:P 3 "register_operand" "0") + (match_operand:P 5 "register_operand" "2"))) + (set (match_operand:P 1 "register_operand" "=S") + (plus:P (match_operand:P 4 "register_operand" "1") (match_dup 5))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5))] + "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "%^rep{%;} movsb" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "QI")]) + +(define_expand "setmem<mode>" + [(use (match_operand:BLK 0 "memory_operand")) + (use (match_operand:SWI48 1 "nonmemory_operand")) + (use (match_operand:QI 2 "nonmemory_operand")) + (use (match_operand 3 "const_int_operand")) + (use (match_operand:SI 4 "const_int_operand")) + (use (match_operand:SI 5 "const_int_operand")) + (use (match_operand:SI 6 "")) + (use (match_operand:SI 7 "")) + (use (match_operand:SI 8 ""))] + "" +{ + if (ix86_expand_set_or_movmem (operands[0], NULL, + operands[1], operands[2], + operands[3], operands[4], + operands[5], operands[6], + operands[7], operands[8], true)) + DONE; + else + FAIL; +}) + +;; Most CPUs don't like single string operations +;; Handle this case here to simplify previous expander. + +(define_expand "strset" + [(set (match_operand 1 "memory_operand") + (match_operand 2 "register_operand")) + (parallel [(set (match_operand 0 "register_operand") + (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (GET_MODE (operands[1]) != GET_MODE (operands[2])) + operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0); + + /* If .md ever supports :P for Pmode, this can be directly + in the pattern above. */ + operands[3] = gen_rtx_PLUS (Pmode, operands[0], + GEN_INT (GET_MODE_SIZE (GET_MODE + (operands[2])))); + /* Can't use this if the user has appropriated eax or edi. */ + if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) + && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])) + { + emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } +}) + +(define_expand "strset_singleop" + [(parallel [(set (match_operand 1 "memory_operand") + (match_operand 2 "register_operand")) + (set (match_operand 0 "register_operand") + (match_operand 3)) + (unspec [(const_int 0)] UNSPEC_STOS)])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*strsetdi_rex_1" + [(set (mem:DI (match_operand:P 1 "register_operand" "0")) + (match_operand:DI 2 "register_operand" "a")) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 1) + (const_int 8))) + (unspec [(const_int 0)] UNSPEC_STOS)] + "TARGET_64BIT + && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])" + "%^stosq" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +(define_insn "*strsetsi_1" + [(set (mem:SI (match_operand:P 1 "register_operand" "0")) + (match_operand:SI 2 "register_operand" "a")) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 1) + (const_int 4))) + (unspec [(const_int 0)] UNSPEC_STOS)] + "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])" + "%^stos{l|d}" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*strsethi_1" + [(set (mem:HI (match_operand:P 1 "register_operand" "0")) + (match_operand:HI 2 "register_operand" "a")) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 1) + (const_int 2))) + (unspec [(const_int 0)] UNSPEC_STOS)] + "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])" + "%^stosw" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "HI")]) + +(define_insn "*strsetqi_1" + [(set (mem:QI (match_operand:P 1 "register_operand" "0")) + (match_operand:QI 2 "register_operand" "a")) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_dup 1) + (const_int 1))) + (unspec [(const_int 0)] UNSPEC_STOS)] + "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])" + "%^stosb" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set (attr "prefix_rex") + (if_then_else + (match_test "<P:MODE>mode == DImode") + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +(define_expand "rep_stos" + [(parallel [(set (match_operand 1 "register_operand") (const_int 0)) + (set (match_operand 0 "register_operand") + (match_operand 4)) + (set (match_operand 2 "memory_operand") (const_int 0)) + (use (match_operand 3 "register_operand")) + (use (match_dup 1))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*rep_stosdi_rex64" + [(set (match_operand:P 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (ashift:P (match_operand:P 4 "register_operand" "1") + (const_int 3)) + (match_operand:P 3 "register_operand" "0"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:DI 2 "register_operand" "a")) + (use (match_dup 4))] + "TARGET_64BIT + && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" + "%^rep{%;} stosq" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +(define_insn "*rep_stossi" + [(set (match_operand:P 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (ashift:P (match_operand:P 4 "register_operand" "1") + (const_int 2)) + (match_operand:P 3 "register_operand" "0"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:SI 2 "register_operand" "a")) + (use (match_dup 4))] + "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" + "%^rep{%;} stos{l|d}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*rep_stosqi" + [(set (match_operand:P 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:P 0 "register_operand" "=D") + (plus:P (match_operand:P 3 "register_operand" "0") + (match_operand:P 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:QI 2 "register_operand" "a")) + (use (match_dup 4))] + "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" + "%^rep{%;} stosb" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set (attr "prefix_rex") + (if_then_else + (match_test "<P:MODE>mode == DImode") + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +(define_expand "cmpstrnsi" + [(set (match_operand:SI 0 "register_operand") + (compare:SI (match_operand:BLK 1 "general_operand") + (match_operand:BLK 2 "general_operand"))) + (use (match_operand 3 "general_operand")) + (use (match_operand 4 "immediate_operand"))] + "" +{ + rtx addr1, addr2, out, outlow, count, countreg, align; + + if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS) + FAIL; + + /* Can't use this if the user has appropriated ecx, esi or edi. */ + if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) + FAIL; + + out = operands[0]; + if (!REG_P (out)) + out = gen_reg_rtx (SImode); + + addr1 = copy_addr_to_reg (XEXP (operands[1], 0)); + addr2 = copy_addr_to_reg (XEXP (operands[2], 0)); + if (addr1 != XEXP (operands[1], 0)) + operands[1] = replace_equiv_address_nv (operands[1], addr1); + if (addr2 != XEXP (operands[2], 0)) + operands[2] = replace_equiv_address_nv (operands[2], addr2); + + count = operands[3]; + countreg = ix86_zero_extend_to_Pmode (count); + + /* %%% Iff we are testing strict equality, we can use known alignment + to good advantage. This may be possible with combine, particularly + once cc0 is dead. */ + align = operands[4]; + + if (CONST_INT_P (count)) + { + if (INTVAL (count) == 0) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); + } + else + { + rtx (*gen_cmp) (rtx, rtx); + + gen_cmp = (TARGET_64BIT + ? gen_cmpdi_1 : gen_cmpsi_1); + + emit_insn (gen_cmp (countreg, countreg)); + emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); + } + + outlow = gen_lowpart (QImode, out); + emit_insn (gen_cmpintqi (outlow)); + emit_move_insn (out, gen_rtx_SIGN_EXTEND (SImode, outlow)); + + if (operands[0] != out) + emit_move_insn (operands[0], out); + + DONE; +}) + +;; Produce a tri-state integer (-1, 0, 1) from condition codes. + +(define_expand "cmpintqi" + [(set (match_dup 1) + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (match_dup 2) + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (parallel [(set (match_operand:QI 0 "register_operand") + (minus:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + operands[1] = gen_reg_rtx (QImode); + operands[2] = gen_reg_rtx (QImode); +}) + +;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is +;; zero. Emit extra code to make sure that a zero-length compare is EQ. + +(define_expand "cmpstrnqi_nz_1" + [(parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand 4 "memory_operand") + (match_operand 5 "memory_operand"))) + (use (match_operand 2 "register_operand")) + (use (match_operand:SI 3 "immediate_operand")) + (clobber (match_operand 0 "register_operand")) + (clobber (match_operand 1 "register_operand")) + (clobber (match_dup 2))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*cmpstrnqi_nz_1" + [(set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0")) + (mem:BLK (match_operand:P 5 "register_operand" "1")))) + (use (match_operand:P 6 "register_operand" "2")) + (use (match_operand:SI 3 "immediate_operand" "i")) + (clobber (match_operand:P 0 "register_operand" "=S")) + (clobber (match_operand:P 1 "register_operand" "=D")) + (clobber (match_operand:P 2 "register_operand" "=c"))] + "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "%^repz{%;} cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set (attr "prefix_rex") + (if_then_else + (match_test "<P:MODE>mode == DImode") + (const_string "0") + (const_string "*"))) + (set_attr "prefix_rep" "1")]) + +;; The same, but the count is not known to not be zero. + +(define_expand "cmpstrnqi_1" + [(parallel [(set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand 2 "register_operand") + (const_int 0)) + (compare:CC (match_operand 4 "memory_operand") + (match_operand 5 "memory_operand")) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand")) + (use (reg:CC FLAGS_REG)) + (clobber (match_operand 0 "register_operand")) + (clobber (match_operand 1 "register_operand")) + (clobber (match_dup 2))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*cmpstrnqi_1" + [(set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand:P 6 "register_operand" "2") + (const_int 0)) + (compare:CC (mem:BLK (match_operand:P 4 "register_operand" "0")) + (mem:BLK (match_operand:P 5 "register_operand" "1"))) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:CC FLAGS_REG)) + (clobber (match_operand:P 0 "register_operand" "=S")) + (clobber (match_operand:P 1 "register_operand" "=D")) + (clobber (match_operand:P 2 "register_operand" "=c"))] + "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" + "%^repz{%;} cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set (attr "prefix_rex") + (if_then_else + (match_test "<P:MODE>mode == DImode") + (const_string "0") + (const_string "*"))) + (set_attr "prefix_rep" "1")]) + +(define_expand "strlen<mode>" + [(set (match_operand:P 0 "register_operand") + (unspec:P [(match_operand:BLK 1 "general_operand") + (match_operand:QI 2 "immediate_operand") + (match_operand 3 "immediate_operand")] + UNSPEC_SCAS))] + "" +{ + if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_expand "strlenqi_1" + [(parallel [(set (match_operand 0 "register_operand") + (match_operand 2)) + (clobber (match_operand 1 "register_operand")) + (clobber (reg:CC FLAGS_REG))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*strlenqi_1" + [(set (match_operand:P 0 "register_operand" "=&c") + (unspec:P [(mem:BLK (match_operand:P 5 "register_operand" "1")) + (match_operand:QI 2 "register_operand" "a") + (match_operand:P 3 "immediate_operand" "i") + (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS)) + (clobber (match_operand:P 1 "register_operand" "=D")) + (clobber (reg:CC FLAGS_REG))] + "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" + "%^repnz{%;} scasb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set (attr "prefix_rex") + (if_then_else + (match_test "<P:MODE>mode == DImode") + (const_string "0") + (const_string "*"))) + (set_attr "prefix_rep" "1")]) + +;; Peephole optimizations to clean up after cmpstrn*. This should be +;; handled in combine, but it is not currently up to the task. +;; When used for their truth value, the cmpstrn* expanders generate +;; code like this: +;; +;; repz cmpsb +;; seta %al +;; setb %dl +;; cmpb %al, %dl +;; jcc label +;; +;; The intermediate three instructions are unnecessary. + +;; This one handles cmpstrn*_nz_1... +(define_peephole2 + [(parallel[ + (set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_operand 4 "register_operand")) + (mem:BLK (match_operand 5 "register_operand")))) + (use (match_operand 6 "register_operand")) + (use (match_operand:SI 3 "immediate_operand")) + (clobber (match_operand 0 "register_operand")) + (clobber (match_operand 1 "register_operand")) + (clobber (match_operand 2 "register_operand"))]) + (set (match_operand:QI 7 "register_operand") + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (match_operand:QI 8 "register_operand") + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (reg FLAGS_REG) + (compare (match_dup 7) (match_dup 8))) + ] + "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" + [(parallel[ + (set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_dup 4)) + (mem:BLK (match_dup 5)))) + (use (match_dup 6)) + (use (match_dup 3)) + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (clobber (match_dup 2))])]) + +;; ...and this one handles cmpstrn*_1. +(define_peephole2 + [(parallel[ + (set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand 6 "register_operand") + (const_int 0)) + (compare:CC (mem:BLK (match_operand 4 "register_operand")) + (mem:BLK (match_operand 5 "register_operand"))) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand")) + (use (reg:CC FLAGS_REG)) + (clobber (match_operand 0 "register_operand")) + (clobber (match_operand 1 "register_operand")) + (clobber (match_operand 2 "register_operand"))]) + (set (match_operand:QI 7 "register_operand") + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (match_operand:QI 8 "register_operand") + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (reg FLAGS_REG) + (compare (match_dup 7) (match_dup 8))) + ] + "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" + [(parallel[ + (set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_dup 6) + (const_int 0)) + (compare:CC (mem:BLK (match_dup 4)) + (mem:BLK (match_dup 5))) + (const_int 0))) + (use (match_dup 3)) + (use (reg:CC FLAGS_REG)) + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (clobber (match_dup 2))])]) + +;; Conditional move instructions. + +(define_expand "mov<mode>cc" + [(set (match_operand:SWIM 0 "register_operand") + (if_then_else:SWIM (match_operand 1 "comparison_operator") + (match_operand:SWIM 2 "<general_operand>") + (match_operand:SWIM 3 "<general_operand>")))] + "" + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") + +;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing +;; the register first winds up with `sbbl $0,reg', which is also weird. +;; So just document what we're doing explicitly. + +(define_expand "x86_mov<mode>cc_0_m1" + [(parallel + [(set (match_operand:SWI48 0 "register_operand") + (if_then_else:SWI48 + (match_operator:SWI48 2 "ix86_carry_flag_operator" + [(match_operand 1 "flags_reg_operand") + (const_int 0)]) + (const_int -1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_insn "*x86_mov<mode>cc_0_m1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (if_then_else:SWI48 (match_operator 1 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int -1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{<imodesuffix>}\t%0, %0" + ; Since we don't have the proper number of operands for an alu insn, + ; fill in all the blanks. + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "<MODE>") + (set_attr "length_immediate" "0")]) + +(define_insn "*x86_mov<mode>cc_0_m1_se" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (sign_extract:SWI48 (match_operator 1 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{<imodesuffix>}\t%0, %0" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "<MODE>") + (set_attr "length_immediate" "0")]) + +(define_insn "*x86_mov<mode>cc_0_m1_neg" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (neg:SWI48 (match_operator 1 "ix86_carry_flag_operator" + [(reg FLAGS_REG) (const_int 0)]))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{<imodesuffix>}\t%0, %0" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "<MODE>") + (set_attr "length_immediate" "0")]) + +(define_insn "*mov<mode>cc_noc" + [(set (match_operand:SWI248 0 "register_operand" "=r,r") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI248 2 "nonimmediate_operand" "rm,0") + (match_operand:SWI248 3 "nonimmediate_operand" "0,rm")))] + "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "icmov") + (set_attr "mode" "<MODE>")]) + +;; Don't do conditional moves with memory inputs. This splitter helps +;; register starved x86_32 by forcing inputs into registers before reload. +(define_split + [(set (match_operand:SWI248 0 "register_operand") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI248 2 "nonimmediate_operand") + (match_operand:SWI248 3 "nonimmediate_operand")))] + "!TARGET_64BIT && TARGET_CMOVE + && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && (MEM_P (operands[2]) || MEM_P (operands[3])) + && can_create_pseudo_p () + && optimize_insn_for_speed_p ()" + [(set (match_dup 0) + (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))] +{ + if (MEM_P (operands[2])) + operands[2] = force_reg (<MODE>mode, operands[2]); + if (MEM_P (operands[3])) + operands[3] = force_reg (<MODE>mode, operands[3]); +}) + +(define_insn "*movqicc_noc" + [(set (match_operand:QI 0 "register_operand" "=r,r") + (if_then_else:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:QI 2 "register_operand" "r,0") + (match_operand:QI 3 "register_operand" "0,r")))] + "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL" + "#" + [(set_attr "type" "icmov") + (set_attr "mode" "QI")]) + +(define_split + [(set (match_operand:SWI12 0 "register_operand") + (if_then_else:SWI12 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI12 2 "register_operand") + (match_operand:SWI12 3 "register_operand")))] + "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL + && reload_completed" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))] +{ + operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_lowpart (SImode, operands[3]); +}) + +;; Don't do conditional moves with memory inputs +(define_peephole2 + [(match_scratch:SWI248 2 "r") + (set (match_operand:SWI248 0 "register_operand") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_dup 0) + (match_operand:SWI248 3 "memory_operand")))] + "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (if_then_else:SWI248 (match_dup 1) (match_dup 0) (match_dup 2)))]) + +(define_peephole2 + [(match_scratch:SWI248 2 "r") + (set (match_operand:SWI248 0 "register_operand") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI248 3 "memory_operand") + (match_dup 0)))] + "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 0)))]) + +(define_expand "mov<mode>cc" + [(set (match_operand:X87MODEF 0 "register_operand") + (if_then_else:X87MODEF + (match_operand 1 "comparison_operator") + (match_operand:X87MODEF 2 "register_operand") + (match_operand:X87MODEF 3 "register_operand")))] + "(TARGET_80387 && TARGET_CMOVE) + || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)" + "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") + +(define_insn "*movxfcc_1" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (if_then_else:XF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:XF 2 "register_operand" "f,0") + (match_operand:XF 3 "register_operand" "0,f")))] + "TARGET_80387 && TARGET_CMOVE" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov") + (set_attr "mode" "XF")]) + +(define_insn "*movdfcc_1" + [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r,r ,r") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" + "f ,0,rm,0 ,rm,0") + (match_operand:DF 3 "nonimmediate_operand" + "0 ,f,0 ,rm,0, rm")))] + "TARGET_80387 && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + # + # + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "isa" "*,*,nox64,nox64,x64,x64") + (set_attr "type" "fcmov,fcmov,multi,multi,icmov,icmov") + (set_attr "mode" "DF,DF,DI,DI,DI,DI")]) + +(define_split + [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand") + (match_operand:DF 3 "nonimmediate_operand")))] + "!TARGET_64BIT && reload_completed" + [(set (match_dup 2) + (if_then_else:SI (match_dup 1) (match_dup 4) (match_dup 5))) + (set (match_dup 3) + (if_then_else:SI (match_dup 1) (match_dup 6) (match_dup 7)))] +{ + split_double_mode (DImode, &operands[2], 2, &operands[4], &operands[6]); + split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]); +}) + +(define_insn "*movsfcc_1_387" + [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") + (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))] + "TARGET_80387 && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov,fcmov,icmov,icmov") + (set_attr "mode" "SF,SF,SI,SI")]) + +;; Don't do conditional moves with memory inputs. This splitter helps +;; register starved x86_32 by forcing inputs into registers before reload. +(define_split + [(set (match_operand:MODEF 0 "register_operand") + (if_then_else:MODEF (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:MODEF 2 "nonimmediate_operand") + (match_operand:MODEF 3 "nonimmediate_operand")))] + "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE + && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && (MEM_P (operands[2]) || MEM_P (operands[3])) + && can_create_pseudo_p () + && optimize_insn_for_speed_p ()" + [(set (match_dup 0) + (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))] +{ + if (MEM_P (operands[2])) + operands[2] = force_reg (<MODE>mode, operands[2]); + if (MEM_P (operands[3])) + operands[3] = force_reg (<MODE>mode, operands[3]); +}) + +;; Don't do conditional moves with memory inputs +(define_peephole2 + [(match_scratch:MODEF 2 "r") + (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand") + (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_dup 0) + (match_operand:MODEF 3 "memory_operand")))] + "(<MODE>mode != DFmode || TARGET_64BIT) + && TARGET_80387 && TARGET_CMOVE + && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (if_then_else:MODEF (match_dup 1) (match_dup 0) (match_dup 2)))]) + +(define_peephole2 + [(match_scratch:MODEF 2 "r") + (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand") + (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:MODEF 3 "memory_operand") + (match_dup 0)))] + "(<MODE>mode != DFmode || TARGET_64BIT) + && TARGET_80387 && TARGET_CMOVE + && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 0)))]) + +;; All moves in XOP pcmov instructions are 128 bits and hence we restrict +;; the scalar versions to have only XMM registers as operands. + +;; XOP conditional move +(define_insn "*xop_pcmov_<mode>" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (if_then_else:MODEF + (match_operand:MODEF 1 "register_operand" "x") + (match_operand:MODEF 2 "register_operand" "x") + (match_operand:MODEF 3 "register_operand" "x")))] + "TARGET_XOP" + "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}" + [(set_attr "type" "sse4arg")]) + +;; These versions of the min/max patterns are intentionally ignorant of +;; their behavior wrt -0.0 and NaN (via the commutative operand mark). +;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator +;; are undefined in this condition, we're certain this is correct. + +(define_insn "<code><mode>3" + [(set (match_operand:MODEF 0 "register_operand" "=x,v") + (smaxmin:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "%0,v") + (match_operand:MODEF 2 "nonimmediate_operand" "xm,vm")))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" + "@ + <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2} + v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "prefix" "orig,vex") + (set_attr "type" "sseadd") + (set_attr "mode" "<MODE>")]) + +;; These versions of the min/max patterns implement exactly the operations +;; min = (op1 < op2 ? op1 : op2) +;; max = (!(op1 < op2) ? op1 : op2) +;; Their operands are not commutative, and thus they may be used in the +;; presence of -0.0 and NaN. + +(define_int_iterator IEEE_MAXMIN + [UNSPEC_IEEE_MAX + UNSPEC_IEEE_MIN]) + +(define_int_attr ieee_maxmin + [(UNSPEC_IEEE_MAX "max") + (UNSPEC_IEEE_MIN "min")]) + +(define_insn "*ieee_s<ieee_maxmin><mode>3" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm,xm")] + IEEE_MAXMIN))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" + "@ + <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2} + v<ieee_maxmin><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "prefix" "orig,vex") + (set_attr "type" "sseadd") + (set_attr "mode" "<MODE>")]) + +;; Make two stack loads independent: +;; fld aa fld aa +;; fld %st(0) -> fld bb +;; fmul bb fmul %st(1), %st +;; +;; Actually we only match the last two instructions for simplicity. +(define_peephole2 + [(set (match_operand 0 "fp_register_operand") + (match_operand 1 "fp_register_operand")) + (set (match_dup 0) + (match_operator 2 "binary_fp_operator" + [(match_dup 0) + (match_operand 3 "memory_operand")]))] + "REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_dup 0) (match_dup 3)) + (set (match_dup 0) (match_dup 4))] + + ;; The % modifier is not operational anymore in peephole2's, so we have to + ;; swap the operands manually in the case of addition and multiplication. +{ + rtx op0, op1; + + if (COMMUTATIVE_ARITH_P (operands[2])) + op0 = operands[0], op1 = operands[1]; + else + op0 = operands[1], op1 = operands[0]; + + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), + GET_MODE (operands[2]), + op0, op1); +}) + +;; Conditional addition patterns +(define_expand "add<mode>cc" + [(match_operand:SWI 0 "register_operand") + (match_operand 1 "ordered_comparison_operator") + (match_operand:SWI 2 "register_operand") + (match_operand:SWI 3 "const_int_operand")] + "" + "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") + +;; Misc patterns (?) + +;; This pattern exists to put a dependency on all ebp-based memory accesses. +;; Otherwise there will be nothing to keep +;; +;; [(set (reg ebp) (reg esp))] +;; [(set (reg esp) (plus (reg esp) (const_int -160000))) +;; (clobber (eflags)] +;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))] +;; +;; in proper program order. + +(define_insn "pro_epilogue_adjust_stack_<mode>_add" + [(set (match_operand:P 0 "register_operand" "=r,r") + (plus:P (match_operand:P 1 "register_operand" "0,r") + (match_operand:P 2 "<nonmemory_operand>" "r<i>,l<i>"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOV: + return "mov{<imodesuffix>}\t{%1, %0|%0, %1}"; + + case TYPE_ALU: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (x86_maybe_negate_const_int (&operands[2], <MODE>mode)) + return "sub{<imodesuffix>}\t{%2, %0|%0, %2}"; + + return "add{<imodesuffix>}\t{%2, %0|%0, %2}"; + + default: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}"; + } +} + [(set (attr "type") + (cond [(and (eq_attr "alternative" "0") + (not (match_test "TARGET_OPT_AGU"))) + (const_string "alu") + (match_operand:<MODE> 2 "const0_operand") + (const_string "imov") + ] + (const_string "lea"))) + (set (attr "length_immediate") + (cond [(eq_attr "type" "imov") + (const_string "0") + (and (eq_attr "type" "alu") + (match_operand 2 "const128_operand")) + (const_string "1") + ] + (const_string "*"))) + (set_attr "mode" "<MODE>")]) + +(define_insn "pro_epilogue_adjust_stack_<mode>_sub" + [(set (match_operand:P 0 "register_operand" "=r") + (minus:P (match_operand:P 1 "register_operand" "0") + (match_operand:P 2 "register_operand" "r"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "" + "sub{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "<MODE>")]) + +(define_insn "allocate_stack_worker_probe_<mode>" + [(set (match_operand:P 0 "register_operand" "=a") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")] + UNSPECV_STACK_PROBE)) + (clobber (reg:CC FLAGS_REG))] + "ix86_target_stack_probe ()" + "call\t___chkstk_ms" + [(set_attr "type" "multi") + (set_attr "length" "5")]) + +(define_expand "allocate_stack" + [(match_operand 0 "register_operand") + (match_operand 1 "general_operand")] + "ix86_target_stack_probe ()" +{ + rtx x; + +#ifndef CHECK_STACK_LIMIT +#define CHECK_STACK_LIMIT 0 +#endif + + if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1]) + && INTVAL (operands[1]) < CHECK_STACK_LIMIT) + x = operands[1]; + else + { + rtx (*insn) (rtx, rtx); + + x = copy_to_mode_reg (Pmode, operands[1]); + + insn = (TARGET_64BIT + ? gen_allocate_stack_worker_probe_di + : gen_allocate_stack_worker_probe_si); + + emit_insn (insn (x, x)); + } + + x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, x, + stack_pointer_rtx, 0, OPTAB_DIRECT); + + if (x != stack_pointer_rtx) + emit_move_insn (stack_pointer_rtx, x); + + emit_move_insn (operands[0], virtual_stack_dynamic_rtx); + DONE; +}) + +;; Use IOR for stack probes, this is shorter. +(define_expand "probe_stack" + [(match_operand 0 "memory_operand")] + "" +{ + rtx (*gen_ior3) (rtx, rtx, rtx); + + gen_ior3 = (GET_MODE (operands[0]) == DImode + ? gen_iordi3 : gen_iorsi3); + + emit_insn (gen_ior3 (operands[0], operands[0], const0_rtx)); + DONE; +}) + +(define_insn "adjust_stack_and_probe<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0")] + UNSPECV_PROBE_STACK_RANGE)) + (set (reg:P SP_REG) + (minus:P (reg:P SP_REG) (match_operand:P 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "" + "* return output_adjust_stack_and_probe (operands[0]);" + [(set_attr "type" "multi")]) + +(define_insn "probe_stack_range<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0") + (match_operand:P 2 "const_int_operand" "n")] + UNSPECV_PROBE_STACK_RANGE)) + (clobber (reg:CC FLAGS_REG))] + "" + "* return output_probe_stack_range (operands[0], operands[2]);" + [(set_attr "type" "multi")]) + +(define_expand "builtin_setjmp_receiver" + [(label_ref (match_operand 0))] + "!TARGET_64BIT && flag_pic" +{ +#if TARGET_MACHO + if (TARGET_MACHO) + { + rtx xops[3]; + rtx picreg = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM); + rtx label_rtx = gen_label_rtx (); + emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx)); + xops[0] = xops[1] = picreg; + xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx)); + ix86_expand_binary_operator (MINUS, SImode, xops); + } + else +#endif + emit_insn (gen_set_got (pic_offset_table_rtx)); + DONE; +}) + +(define_insn_and_split "nonlocal_goto_receiver" + [(unspec_volatile [(const_int 0)] UNSPECV_NLGR)] + "TARGET_MACHO && !TARGET_64BIT && flag_pic" + "#" + "&& reload_completed" + [(const_int 0)] +{ + if (crtl->uses_pic_offset_table) + { + rtx xops[3]; + rtx label_rtx = gen_label_rtx (); + rtx tmp; + + /* Get a new pic base. */ + emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx)); + /* Correct this with the offset from the new to the old. */ + xops[0] = xops[1] = pic_offset_table_rtx; + label_rtx = gen_rtx_LABEL_REF (SImode, label_rtx); + tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, label_rtx), + UNSPEC_MACHOPIC_OFFSET); + xops[2] = gen_rtx_CONST (Pmode, tmp); + ix86_expand_binary_operator (MINUS, SImode, xops); + } + else + /* No pic reg restore needed. */ + emit_note (NOTE_INSN_DELETED); + + DONE; +}) + +;; Avoid redundant prefixes by splitting HImode arithmetic to SImode. +;; Do not split instructions with mask registers. +(define_split + [(set (match_operand 0 "general_reg_operand") + (match_operator 3 "promotable_binary_operator" + [(match_operand 1 "general_reg_operand") + (match_operand 2 "aligned_operand")])) + (clobber (reg:CC FLAGS_REG))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && ((GET_MODE (operands[0]) == HImode + && ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX) + /* ??? next two lines just !satisfies_constraint_K (...) */ + || !CONST_INT_P (operands[2]) + || satisfies_constraint_K (operands[2]))) + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))" + [(parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + if (GET_CODE (operands[3]) != ASHIFT) + operands[2] = gen_lowpart (SImode, operands[2]); + PUT_MODE (operands[3], SImode); +}) + +; Promote the QImode tests, as i386 has encoding of the AND +; instruction with 32-bit sign-extended immediate and thus the +; instruction size is unchanged, except in the %eax case for +; which it is increased by one byte, hence the ! optimize_size. +(define_split + [(set (match_operand 0 "flags_reg_operand") + (match_operator 2 "compare_operator" + [(and (match_operand 3 "aligned_operand") + (match_operand 4 "const_int_operand")) + (const_int 0)])) + (set (match_operand 1 "register_operand") + (and (match_dup 3) (match_dup 4)))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && optimize_insn_for_speed_p () + && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX) + || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode)) + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4)) + (const_int 0)])) + (set (match_dup 1) + (and:SI (match_dup 3) (match_dup 4)))])] +{ + operands[4] + = gen_int_mode (INTVAL (operands[4]) + & GET_MODE_MASK (GET_MODE (operands[1])), SImode); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[3] = gen_lowpart (SImode, operands[3]); +}) + +; Don't promote the QImode tests, as i386 doesn't have encoding of +; the TEST instruction with 32-bit sign-extended immediate and thus +; the instruction size would at least double, which is not what we +; want even with ! optimize_size. +(define_split + [(set (match_operand 0 "flags_reg_operand") + (match_operator 1 "compare_operator" + [(and (match_operand:HI 2 "aligned_operand") + (match_operand:HI 3 "const_int_operand")) + (const_int 0)]))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && ! TARGET_FAST_PREFIX + && optimize_insn_for_speed_p () + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)" + [(set (match_dup 0) + (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) + (const_int 0)]))] +{ + operands[3] + = gen_int_mode (INTVAL (operands[3]) + & GET_MODE_MASK (GET_MODE (operands[2])), SImode); + operands[2] = gen_lowpart (SImode, operands[2]); +}) + +(define_split + [(set (match_operand 0 "register_operand") + (neg (match_operand 1 "register_operand"))) + (clobber (reg:CC FLAGS_REG))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" + [(parallel [(set (match_dup 0) + (neg:SI (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); +}) + +;; Do not split instructions with mask regs. +(define_split + [(set (match_operand 0 "general_reg_operand") + (not (match_operand 1 "general_reg_operand")))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" + [(set (match_dup 0) + (not:SI (match_dup 1)))] +{ + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); +}) + +;; RTL Peephole optimizations, run before sched2. These primarily look to +;; transform a complex memory operation into two memory to register operations. + +;; Don't push memory operands +(define_peephole2 + [(set (match_operand:SWI 0 "push_operand") + (match_operand:SWI 1 "memory_operand")) + (match_scratch:SWI 2 "<r>")] + "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ()) + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))]) + +;; We need to handle SFmode only, because DFmode and XFmode are split to +;; SImode pushes. +(define_peephole2 + [(set (match_operand:SF 0 "push_operand") + (match_operand:SF 1 "memory_operand")) + (match_scratch:SF 2 "r")] + "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ()) + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))]) + +;; Don't move an immediate directly to memory when the instruction +;; gets too big, or if LCP stalls are a problem for 16-bit moves. +(define_peephole2 + [(match_scratch:SWI124 1 "<r>") + (set (match_operand:SWI124 0 "memory_operand") + (const_int 0))] + "optimize_insn_for_speed_p () + && ((<MODE>mode == HImode + && TARGET_LCP_STALL) + || (!TARGET_USE_MOV0 + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn)) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 2) (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 1))] + "operands[2] = gen_lowpart (SImode, operands[1]);") + +(define_peephole2 + [(match_scratch:SWI124 2 "<r>") + (set (match_operand:SWI124 0 "memory_operand") + (match_operand:SWI124 1 "immediate_operand"))] + "optimize_insn_for_speed_p () + && ((<MODE>mode == HImode + && TARGET_LCP_STALL) + || (TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))]) + +;; Don't compare memory with zero, load and use a test instead. +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand") + (match_operator 1 "compare_operator" + [(match_operand:SI 2 "memory_operand") + (const_int 0)])) + (match_scratch:SI 3 "r")] + "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)" + [(set (match_dup 3) (match_dup 2)) + (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))]) + +;; NOT is not pairable on Pentium, while XOR is, but one byte longer. +;; Don't split NOTs with a displacement operand, because resulting XOR +;; will not be pairable anyway. +;; +;; On AMD K6, NOT is vector decoded with memory operand that cannot be +;; represented using a modRM byte. The XOR replacement is long decoded, +;; so this split helps here as well. +;; +;; Note: Can't do this as a regular split because we can't get proper +;; lifetime information then. + +(define_peephole2 + [(set (match_operand:SWI124 0 "nonimmediate_operand") + (not:SWI124 (match_operand:SWI124 1 "nonimmediate_operand")))] + "optimize_insn_for_speed_p () + && ((TARGET_NOT_UNPAIRABLE + && (!MEM_P (operands[0]) + || !memory_displacement_operand (operands[0], <MODE>mode))) + || (TARGET_NOT_VECTORMODE + && long_memory_operand (operands[0], <MODE>mode))) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (xor:SWI124 (match_dup 1) (const_int -1))) + (clobber (reg:CC FLAGS_REG))])]) + +;; Non pairable "test imm, reg" instructions can be translated to +;; "and imm, reg" if reg dies. The "and" form is also shorter (one +;; byte opcode instead of two, have a short form for byte operands), +;; so do it for other CPUs as well. Given that the value was dead, +;; this should not create any new dependencies. Pass on the sub-word +;; versions if we're concerned about partial register stalls. + +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand") + (match_operator 1 "compare_operator" + [(and:SI (match_operand:SI 2 "register_operand") + (match_operand:SI 3 "immediate_operand")) + (const_int 0)]))] + "ix86_match_ccmode (insn, CCNOmode) + && (true_regnum (operands[2]) != AX_REG + || satisfies_constraint_K (operands[3])) + && peep2_reg_dead_p (1, operands[2])" + [(parallel + [(set (match_dup 0) + (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) + (const_int 0)])) + (set (match_dup 2) + (and:SI (match_dup 2) (match_dup 3)))])]) + +;; We don't need to handle HImode case, because it will be promoted to SImode +;; on ! TARGET_PARTIAL_REG_STALL + +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand") + (match_operator 1 "compare_operator" + [(and:QI (match_operand:QI 2 "register_operand") + (match_operand:QI 3 "immediate_operand")) + (const_int 0)]))] + "! TARGET_PARTIAL_REG_STALL + && ix86_match_ccmode (insn, CCNOmode) + && true_regnum (operands[2]) != AX_REG + && peep2_reg_dead_p (1, operands[2])" + [(parallel + [(set (match_dup 0) + (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) + (const_int 0)])) + (set (match_dup 2) + (and:QI (match_dup 2) (match_dup 3)))])]) + +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand") + (match_operator 1 "compare_operator" + [(and:SI + (zero_extract:SI + (match_operand 2 "ext_register_operand") + (const_int 8) + (const_int 8)) + (match_operand 3 "const_int_operand")) + (const_int 0)]))] + "! TARGET_PARTIAL_REG_STALL + && ix86_match_ccmode (insn, CCNOmode) + && true_regnum (operands[2]) != AX_REG + && peep2_reg_dead_p (1, operands[2])" + [(parallel [(set (match_dup 0) + (match_op_dup 1 + [(and:SI + (zero_extract:SI + (match_dup 2) + (const_int 8) + (const_int 8)) + (match_dup 3)) + (const_int 0)])) + (set (zero_extract:SI (match_dup 2) + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_dup 2) + (const_int 8) + (const_int 8)) + (match_dup 3)))])]) + +;; Don't do logical operations with memory inputs. +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_dup 0) + (match_operand:SI 1 "memory_operand")])) + (clobber (reg:CC FLAGS_REG))])] + "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())" + [(set (match_dup 2) (match_dup 1)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 2)])) + (clobber (reg:CC FLAGS_REG))])]) + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_operand:SI 1 "memory_operand") + (match_dup 0)])) + (clobber (reg:CC FLAGS_REG))])] + "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())" + [(set (match_dup 2) (match_dup 1)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 2) (match_dup 0)])) + (clobber (reg:CC FLAGS_REG))])]) + +;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when the memory address +;; refers to the destination of the load! + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "register_operand")) + (parallel [(set (match_dup 0) + (match_operator:SI 3 "commutative_operator" + [(match_dup 0) + (match_operand:SI 2 "memory_operand")])) + (clobber (reg:CC FLAGS_REG))])] + "REGNO (operands[0]) != REGNO (operands[1]) + && GENERAL_REGNO_P (REGNO (operands[0])) + && GENERAL_REGNO_P (REGNO (operands[1]))" + [(set (match_dup 0) (match_dup 4)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 1)])) + (clobber (reg:CC FLAGS_REG))])] + "operands[4] = replace_rtx (operands[2], operands[0], operands[1]);") + +(define_peephole2 + [(set (match_operand 0 "register_operand") + (match_operand 1 "register_operand")) + (set (match_dup 0) + (match_operator 3 "commutative_operator" + [(match_dup 0) + (match_operand 2 "memory_operand")]))] + "REGNO (operands[0]) != REGNO (operands[1]) + && ((MMX_REG_P (operands[0]) && MMX_REG_P (operands[1])) + || (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1])))" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 1)]))]) + +; Don't do logical operations with memory outputs +; +; These two don't make sense for PPro/PII -- we're expanding a 4-uop +; instruction into two 1-uop insns plus a 2-uop insn. That last has +; the same decoder scheduling characteristics as the original. + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "memory_operand") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_dup 0) + (match_operand:SI 1 "nonmemory_operand")])) + (clobber (reg:CC FLAGS_REG))])] + "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + /* Do not split stack checking probes. */ + && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx" + [(set (match_dup 2) (match_dup 0)) + (parallel [(set (match_dup 2) + (match_op_dup 3 [(match_dup 2) (match_dup 1)])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 2))]) + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "memory_operand") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_operand:SI 1 "nonmemory_operand") + (match_dup 0)])) + (clobber (reg:CC FLAGS_REG))])] + "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + /* Do not split stack checking probes. */ + && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx" + [(set (match_dup 2) (match_dup 0)) + (parallel [(set (match_dup 2) + (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 2))]) + +;; Attempt to use arith or logical operations with memory outputs with +;; setting of flags. +(define_peephole2 + [(set (match_operand:SWI 0 "register_operand") + (match_operand:SWI 1 "memory_operand")) + (parallel [(set (match_dup 0) + (match_operator:SWI 3 "plusminuslogic_operator" + [(match_dup 0) + (match_operand:SWI 2 "<nonmemory_operand>")])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_dup 0)) + (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && peep2_reg_dead_p (4, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && (<MODE>mode != QImode + || immediate_operand (operands[2], QImode) + || q_regs_operand (operands[2], QImode)) + && ix86_match_ccmode (peep2_next_insn (3), + (GET_CODE (operands[3]) == PLUS + || GET_CODE (operands[3]) == MINUS) + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 4) (match_dup 5)) + (set (match_dup 1) (match_op_dup 3 [(match_dup 1) + (match_dup 2)]))])] +{ + operands[4] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode, + copy_rtx (operands[1]), + copy_rtx (operands[2])); + operands[5] = gen_rtx_COMPARE (GET_MODE (operands[4]), + operands[5], const0_rtx); +}) + +(define_peephole2 + [(parallel [(set (match_operand:SWI 0 "register_operand") + (match_operator:SWI 2 "plusminuslogic_operator" + [(match_dup 0) + (match_operand:SWI 1 "memory_operand")])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_dup 0)) + (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && GET_CODE (operands[2]) != MINUS + && peep2_reg_dead_p (3, operands[0]) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && ix86_match_ccmode (peep2_next_insn (2), + GET_CODE (operands[2]) == PLUS + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 3) (match_dup 4)) + (set (match_dup 1) (match_op_dup 2 [(match_dup 1) + (match_dup 0)]))])] +{ + operands[3] = SET_DEST (PATTERN (peep2_next_insn (2))); + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), <MODE>mode, + copy_rtx (operands[1]), + copy_rtx (operands[0])); + operands[4] = gen_rtx_COMPARE (GET_MODE (operands[3]), + operands[4], const0_rtx); +}) + +(define_peephole2 + [(set (match_operand:SWI12 0 "register_operand") + (match_operand:SWI12 1 "memory_operand")) + (parallel [(set (match_operand:SI 4 "register_operand") + (match_operator:SI 3 "plusminuslogic_operator" + [(match_dup 4) + (match_operand:SI 2 "nonmemory_operand")])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 1) (match_dup 0)) + (set (reg FLAGS_REG) (compare (match_dup 0) (const_int 0)))] + "(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ()) + && REG_P (operands[0]) && REG_P (operands[4]) + && REGNO (operands[0]) == REGNO (operands[4]) + && peep2_reg_dead_p (4, operands[0]) + && (<MODE>mode != QImode + || immediate_operand (operands[2], SImode) + || q_regs_operand (operands[2], SImode)) + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && !reg_overlap_mentioned_p (operands[0], operands[2]) + && ix86_match_ccmode (peep2_next_insn (3), + (GET_CODE (operands[3]) == PLUS + || GET_CODE (operands[3]) == MINUS) + ? CCGOCmode : CCNOmode)" + [(parallel [(set (match_dup 4) (match_dup 5)) + (set (match_dup 1) (match_dup 6))])] +{ + operands[2] = gen_lowpart (<MODE>mode, operands[2]); + operands[4] = SET_DEST (PATTERN (peep2_next_insn (3))); + operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode, + copy_rtx (operands[1]), operands[2]); + operands[5] = gen_rtx_COMPARE (GET_MODE (operands[4]), + operands[5], const0_rtx); + operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[3]), <MODE>mode, + copy_rtx (operands[1]), + copy_rtx (operands[2])); +}) + +;; Attempt to always use XOR for zeroing registers. +(define_peephole2 + [(set (match_operand 0 "register_operand") + (match_operand 1 "const0_operand"))] + "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ()) + && GENERAL_REG_P (operands[0]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (const_int 0)) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (word_mode, operands[0]);") + +(define_peephole2 + [(set (strict_low_part (match_operand 0 "register_operand")) + (const_int 0))] + "(GET_MODE (operands[0]) == QImode + || GET_MODE (operands[0]) == HImode) + && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ()) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0)) + (clobber (reg:CC FLAGS_REG))])]) + +;; For HI, SI and DI modes, or $-1,reg is smaller than mov $-1,reg. +(define_peephole2 + [(set (match_operand:SWI248 0 "register_operand") + (const_int -1))] + "(optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (const_int -1)) + (clobber (reg:CC FLAGS_REG))])] +{ + if (<MODE_SIZE> < GET_MODE_SIZE (SImode)) + operands[0] = gen_lowpart (SImode, operands[0]); +}) + +;; Attempt to convert simple lea to add/shift. +;; These can be created by move expanders. +;; Disable PLUS peepholes on TARGET_OPT_AGU, since all +;; relevant lea instructions were already split. + +(define_peephole2 + [(set (match_operand:SWI48 0 "register_operand") + (plus:SWI48 (match_dup 0) + (match_operand:SWI48 1 "<nonmemory_operand>")))] + "!TARGET_OPT_AGU + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_peephole2 + [(set (match_operand:SWI48 0 "register_operand") + (plus:SWI48 (match_operand:SWI48 1 "<nonmemory_operand>") + (match_dup 0)))] + "!TARGET_OPT_AGU + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "nonmemory_operand"))))] + "TARGET_64BIT && !TARGET_OPT_AGU + && REGNO (operands[0]) == REGNO (operands[1]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2)))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "nonmemory_operand") + (match_operand:SI 2 "register_operand"))))] + "TARGET_64BIT && !TARGET_OPT_AGU + && REGNO (operands[0]) == REGNO (operands[2]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_peephole2 + [(set (match_operand:SWI48 0 "register_operand") + (mult:SWI48 (match_dup 0) + (match_operand:SWI48 1 "const_int_operand")))] + "exact_log2 (INTVAL (operands[1])) >= 0 + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "const_int_operand"))))] + "TARGET_64BIT + && exact_log2 (INTVAL (operands[2])) >= 0 + && REGNO (operands[0]) == REGNO (operands[1]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2)))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));") + +;; The ESP adjustments can be done by the push and pop instructions. Resulting +;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes. +;; On many CPUs it is also faster, since special hardware to avoid esp +;; dependencies is present. + +;; While some of these conversions may be done using splitters, we use +;; peepholes in order to allow combine_stack_adjustments pass to see +;; nonobfuscated RTL. + +;; Convert prologue esp subtractions to push. +;; We need register to push. In order to keep verify_flow_info happy we have +;; two choices +;; - use scratch and clobber it in order to avoid dependencies +;; - use already live register +;; We can't use the second way right now, since there is no reliable way how to +;; verify that given register is live. First choice will also most likely in +;; fewer dependencies. On the place of esp adjustments it is very likely that +;; call clobbered registers are dead. We may want to use base pointer as an +;; alternative when no register is available later. + +(define_peephole2 + [(match_scratch:W 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)" + [(clobber (match_dup 1)) + (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (clobber (mem:BLK (scratch)))])]) + +(define_peephole2 + [(match_scratch:W 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)" + [(clobber (match_dup 1)) + (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (parallel [(set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (clobber (mem:BLK (scratch)))])]) + +;; Convert esp subtractions to push. +(define_peephole2 + [(match_scratch:W 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "(TARGET_SINGLE_PUSH || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == -GET_MODE_SIZE (word_mode)" + [(clobber (match_dup 1)) + (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))]) + +(define_peephole2 + [(match_scratch:W 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "(TARGET_DOUBLE_PUSH || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == -2*GET_MODE_SIZE (word_mode)" + [(clobber (match_dup 1)) + (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1)) + (set (mem:W (pre_dec:P (reg:P SP_REG))) (match_dup 1))]) + +;; Convert epilogue deallocator to pop. +(define_peephole2 + [(match_scratch:W 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "(TARGET_SINGLE_POP || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)" + [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) + (clobber (mem:BLK (scratch)))])]) + +;; Two pops case is tricky, since pop causes dependency +;; on destination register. We use two registers if available. +(define_peephole2 + [(match_scratch:W 1 "r") + (match_scratch:W 2 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "(TARGET_DOUBLE_POP || optimize_insn_for_size_p ()) + && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)" + [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) + (clobber (mem:BLK (scratch)))]) + (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))]) + +(define_peephole2 + [(match_scratch:W 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_insn_for_size_p () + && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)" + [(parallel [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) + (clobber (mem:BLK (scratch)))]) + (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))]) + +;; Convert esp additions to pop. +(define_peephole2 + [(match_scratch:W 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[0]) == GET_MODE_SIZE (word_mode)" + [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))]) + +;; Two pops case is tricky, since pop causes dependency +;; on destination register. We use two registers if available. +(define_peephole2 + [(match_scratch:W 1 "r") + (match_scratch:W 2 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)" + [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) + (set (match_dup 2) (mem:W (post_inc:P (reg:P SP_REG))))]) + +(define_peephole2 + [(match_scratch:W 1 "r") + (parallel [(set (reg:P SP_REG) + (plus:P (reg:P SP_REG) + (match_operand:P 0 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p () + && INTVAL (operands[0]) == 2*GET_MODE_SIZE (word_mode)" + [(set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG)))) + (set (match_dup 1) (mem:W (post_inc:P (reg:P SP_REG))))]) + +;; Convert compares with 1 to shorter inc/dec operations when CF is not +;; required and register dies. Similarly for 128 to -128. +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand") + (match_operator 1 "compare_operator" + [(match_operand 2 "register_operand") + (match_operand 3 "const_int_operand")]))] + "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_insn_for_size_p ()) + && incdec_operand (operands[3], GET_MODE (operands[3]))) + || (!TARGET_FUSE_CMP_AND_BRANCH + && INTVAL (operands[3]) == 128)) + && ix86_match_ccmode (insn, CCGCmode) + && peep2_reg_dead_p (1, operands[2])" + [(parallel [(set (match_dup 0) + (match_op_dup 1 [(match_dup 2) (match_dup 3)])) + (clobber (match_dup 2))])]) + +;; Convert imul by three, five and nine into lea +(define_peephole2 + [(parallel + [(set (match_operand:SWI48 0 "register_operand") + (mult:SWI48 (match_operand:SWI48 1 "register_operand") + (match_operand:SWI48 2 "const359_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "!TARGET_PARTIAL_REG_STALL + || <MODE>mode == SImode + || optimize_function_for_size_p (cfun)" + [(set (match_dup 0) + (plus:SWI48 (mult:SWI48 (match_dup 1) (match_dup 2)) + (match_dup 1)))] + "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);") + +(define_peephole2 + [(parallel + [(set (match_operand:SWI48 0 "register_operand") + (mult:SWI48 (match_operand:SWI48 1 "nonimmediate_operand") + (match_operand:SWI48 2 "const359_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () + && (!TARGET_PARTIAL_REG_STALL || <MODE>mode == SImode)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 0) + (plus:SWI48 (mult:SWI48 (match_dup 0) (match_dup 2)) + (match_dup 0)))] + "operands[2] = GEN_INT (INTVAL (operands[2]) - 1);") + +;; imul $32bit_imm, mem, reg is vector decoded, while +;; imul $32bit_imm, reg, reg is direct decoded. +(define_peephole2 + [(match_scratch:SWI48 3 "r") + (parallel [(set (match_operand:SWI48 0 "register_operand") + (mult:SWI48 (match_operand:SWI48 1 "memory_operand") + (match_operand:SWI48 2 "immediate_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () + && !satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (mult:SWI48 (match_dup 3) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "memory_operand") + (match_operand:SI 2 "immediate_operand")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT + && TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () + && !satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) + (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) + (clobber (reg:CC FLAGS_REG))])]) + +;; imul $8/16bit_imm, regmem, reg is vector decoded. +;; Convert it into imul reg, reg +;; It would be better to force assembler to encode instruction using long +;; immediate, but there is apparently no way to do so. +(define_peephole2 + [(parallel [(set (match_operand:SWI248 0 "register_operand") + (mult:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand") + (match_operand:SWI248 2 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))]) + (match_scratch:SWI248 3 "r")] + "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p () + && satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:SWI248 (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + +;; After splitting up read-modify operations, array accesses with memory +;; operands might end up in form: +;; sall $2, %eax +;; movl 4(%esp), %edx +;; addl %edx, %eax +;; instead of pre-splitting: +;; sall $2, %eax +;; addl 4(%esp), %eax +;; Turn it into: +;; movl 4(%esp), %edx +;; leal (%edx,%eax,4), %eax + +(define_peephole2 + [(match_scratch:W 5 "r") + (parallel [(set (match_operand 0 "register_operand") + (ashift (match_operand 1 "register_operand") + (match_operand 2 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_operand 3 "register_operand") + (plus (match_dup 0) + (match_operand 4 "x86_64_general_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "IN_RANGE (INTVAL (operands[2]), 1, 3) + /* Validate MODE for lea. */ + && ((!TARGET_PARTIAL_REG_STALL + && (GET_MODE (operands[0]) == QImode + || GET_MODE (operands[0]) == HImode)) + || GET_MODE (operands[0]) == SImode + || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)) + && (rtx_equal_p (operands[0], operands[3]) + || peep2_reg_dead_p (2, operands[0])) + /* We reorder load and the shift. */ + && !reg_overlap_mentioned_p (operands[0], operands[4])" + [(set (match_dup 5) (match_dup 4)) + (set (match_dup 0) (match_dup 1))] +{ + enum machine_mode op1mode = GET_MODE (operands[1]); + enum machine_mode mode = op1mode == DImode ? DImode : SImode; + int scale = 1 << INTVAL (operands[2]); + rtx index = gen_lowpart (word_mode, operands[1]); + rtx base = gen_lowpart (word_mode, operands[5]); + rtx dest = gen_lowpart (mode, operands[3]); + + operands[1] = gen_rtx_PLUS (word_mode, base, + gen_rtx_MULT (word_mode, index, GEN_INT (scale))); + operands[5] = base; + if (mode != word_mode) + operands[1] = gen_rtx_SUBREG (mode, operands[1], 0); + if (op1mode != word_mode) + operands[5] = gen_rtx_SUBREG (op1mode, operands[5], 0); + operands[0] = dest; +}) + +;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5. +;; That, however, is usually mapped by the OS to SIGSEGV, which is often +;; caught for use by garbage collectors and the like. Using an insn that +;; maps to SIGILL makes it more likely the program will rightfully die. +;; Keeping with tradition, "6" is in honor of #UD. +(define_insn "trap" + [(trap_if (const_int 1) (const_int 6))] + "" +{ +#ifdef HAVE_AS_IX86_UD2 + return "ud2"; +#else + return ASM_SHORT "0x0b0f"; +#endif +} + [(set_attr "length" "2")]) + +(define_expand "prefetch" + [(prefetch (match_operand 0 "address_operand") + (match_operand:SI 1 "const_int_operand") + (match_operand:SI 2 "const_int_operand"))] + "TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_PREFETCHWT1" +{ + bool write = INTVAL (operands[1]) != 0; + int locality = INTVAL (operands[2]); + + gcc_assert (IN_RANGE (locality, 0, 3)); + + /* Use 3dNOW prefetch in case we are asking for write prefetch not + supported by SSE counterpart or the SSE prefetch is not available + (K6 machines). Otherwise use SSE prefetch as it allows specifying + of locality. */ + if (TARGET_PREFETCHWT1 && write && locality <= 2) + operands[2] = const2_rtx; + else if (TARGET_PRFCHW && (write || !TARGET_PREFETCH_SSE)) + operands[2] = GEN_INT (3); + else + operands[1] = const0_rtx; +}) + +(define_insn "*prefetch_sse" + [(prefetch (match_operand 0 "address_operand" "p") + (const_int 0) + (match_operand:SI 1 "const_int_operand"))] + "TARGET_PREFETCH_SSE" +{ + static const char * const patterns[4] = { + "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" + }; + + int locality = INTVAL (operands[1]); + gcc_assert (IN_RANGE (locality, 0, 3)); + + return patterns[locality]; +} + [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "prefetch") + (set (attr "length_address") + (symbol_ref "memory_address_length (operands[0], false)")) + (set_attr "memory" "none")]) + +(define_insn "*prefetch_3dnow" + [(prefetch (match_operand 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 3))] + "TARGET_PRFCHW" +{ + if (INTVAL (operands[1]) == 0) + return "prefetch\t%a0"; + else + return "prefetchw\t%a0"; +} + [(set_attr "type" "mmx") + (set (attr "length_address") + (symbol_ref "memory_address_length (operands[0], false)")) + (set_attr "memory" "none")]) + +(define_insn "*prefetch_prefetchwt1_<mode>" + [(prefetch (match_operand:P 0 "address_operand" "p") + (const_int 1) + (const_int 2))] + "TARGET_PREFETCHWT1" + "prefetchwt1\t%a0"; + [(set_attr "type" "sse") + (set (attr "length_address") + (symbol_ref "memory_address_length (operands[0], false)")) + (set_attr "memory" "none")]) + +(define_expand "stack_protect_set" + [(match_operand 0 "memory_operand") + (match_operand 1 "memory_operand")] + "TARGET_SSP_TLS_GUARD" +{ + rtx (*insn)(rtx, rtx); + +#ifdef TARGET_THREAD_SSP_OFFSET + operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET); + insn = (TARGET_LP64 + ? gen_stack_tls_protect_set_di + : gen_stack_tls_protect_set_si); +#else + insn = (TARGET_LP64 + ? gen_stack_protect_set_di + : gen_stack_protect_set_si); +#endif + + emit_insn (insn (operands[0], operands[1])); + DONE; +}) + +(define_insn "stack_protect_set_<mode>" + [(set (match_operand:PTR 0 "memory_operand" "=m") + (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")] + UNSPEC_SP_SET)) + (set (match_scratch:PTR 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_SSP_TLS_GUARD" + "mov{<imodesuffix>}\t{%1, %2|%2, %1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_set_<mode>" + [(set (match_operand:PTR 0 "memory_operand" "=m") + (unspec:PTR [(match_operand:PTR 1 "const_int_operand" "i")] + UNSPEC_SP_TLS_SET)) + (set (match_scratch:PTR 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "mov{<imodesuffix>}\t{%@:%P1, %2|%2, <iptrsize> PTR %@:%P1}\;mov{<imodesuffix>}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2" + [(set_attr "type" "multi")]) + +(define_expand "stack_protect_test" + [(match_operand 0 "memory_operand") + (match_operand 1 "memory_operand") + (match_operand 2)] + "TARGET_SSP_TLS_GUARD" +{ + rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG); + + rtx (*insn)(rtx, rtx, rtx); + +#ifdef TARGET_THREAD_SSP_OFFSET + operands[1] = GEN_INT (TARGET_THREAD_SSP_OFFSET); + insn = (TARGET_LP64 + ? gen_stack_tls_protect_test_di + : gen_stack_tls_protect_test_si); +#else + insn = (TARGET_LP64 + ? gen_stack_protect_test_di + : gen_stack_protect_test_si); +#endif + + emit_insn (insn (flags, operands[0], operands[1])); + + emit_jump_insn (gen_cbranchcc4 (gen_rtx_EQ (VOIDmode, flags, const0_rtx), + flags, const0_rtx, operands[2])); + DONE; +}) + +(define_insn "stack_protect_test_<mode>" + [(set (match_operand:CCZ 0 "flags_reg_operand") + (unspec:CCZ [(match_operand:PTR 1 "memory_operand" "m") + (match_operand:PTR 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (clobber (match_scratch:PTR 3 "=&r"))] + "TARGET_SSP_TLS_GUARD" + "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;xor{<imodesuffix>}\t{%2, %3|%3, %2}" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_test_<mode>" + [(set (match_operand:CCZ 0 "flags_reg_operand") + (unspec:CCZ [(match_operand:PTR 1 "memory_operand" "m") + (match_operand:PTR 2 "const_int_operand" "i")] + UNSPEC_SP_TLS_TEST)) + (clobber (match_scratch:PTR 3 "=r"))] + "" + "mov{<imodesuffix>}\t{%1, %3|%3, %1}\;xor{<imodesuffix>}\t{%@:%P2, %3|%3, <iptrsize> PTR %@:%P2}" + [(set_attr "type" "multi")]) + +(define_insn "sse4_2_crc32<mode>" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "0") + (match_operand:SWI124 2 "nonimmediate_operand" "<r>m")] + UNSPEC_CRC32))] + "TARGET_SSE4_2 || TARGET_CRC32" + "crc32{<imodesuffix>}\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set (attr "prefix_data16") + (if_then_else (match_operand:HI 2) + (const_string "1") + (const_string "*"))) + (set (attr "prefix_rex") + (if_then_else (match_operand:QI 2 "ext_QIreg_operand") + (const_string "1") + (const_string "*"))) + (set_attr "mode" "SI")]) + +(define_insn "sse4_2_crc32di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI + [(match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm")] + UNSPEC_CRC32))] + "TARGET_64BIT && (TARGET_SSE4_2 || TARGET_CRC32)" + "crc32{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "DI")]) + +(define_insn "rdpmc" + [(set (match_operand:DI 0 "register_operand" "=A") + (unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")] + UNSPECV_RDPMC))] + "!TARGET_64BIT" + "rdpmc" + [(set_attr "type" "other") + (set_attr "length" "2")]) + +(define_insn "rdpmc_rex64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec_volatile:DI [(match_operand:SI 2 "register_operand" "c")] + UNSPECV_RDPMC)) + (set (match_operand:DI 1 "register_operand" "=d") + (unspec_volatile:DI [(match_dup 2)] UNSPECV_RDPMC))] + "TARGET_64BIT" + "rdpmc" + [(set_attr "type" "other") + (set_attr "length" "2")]) + +(define_insn "rdtsc" + [(set (match_operand:DI 0 "register_operand" "=A") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))] + "!TARGET_64BIT" + "rdtsc" + [(set_attr "type" "other") + (set_attr "length" "2")]) + +(define_insn "rdtsc_rex64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC)) + (set (match_operand:DI 1 "register_operand" "=d") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSC))] + "TARGET_64BIT" + "rdtsc" + [(set_attr "type" "other") + (set_attr "length" "2")]) + +(define_insn "rdtscp" + [(set (match_operand:DI 0 "register_operand" "=A") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP)) + (set (match_operand:SI 1 "register_operand" "=c") + (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))] + "!TARGET_64BIT" + "rdtscp" + [(set_attr "type" "other") + (set_attr "length" "3")]) + +(define_insn "rdtscp_rex64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP)) + (set (match_operand:DI 1 "register_operand" "=d") + (unspec_volatile:DI [(const_int 0)] UNSPECV_RDTSCP)) + (set (match_operand:SI 2 "register_operand" "=c") + (unspec_volatile:SI [(const_int 0)] UNSPECV_RDTSCP))] + "TARGET_64BIT" + "rdtscp" + [(set_attr "type" "other") + (set_attr "length" "3")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; FXSR, XSAVE and XSAVEOPT instructions +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_insn "fxsave" + [(set (match_operand:BLK 0 "memory_operand" "=m") + (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE))] + "TARGET_FXSR" + "fxsave\t%0" + [(set_attr "type" "other") + (set_attr "memory" "store") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 3"))]) + +(define_insn "fxsave64" + [(set (match_operand:BLK 0 "memory_operand" "=m") + (unspec_volatile:BLK [(const_int 0)] UNSPECV_FXSAVE64))] + "TARGET_64BIT && TARGET_FXSR" + "fxsave64\t%0" + [(set_attr "type" "other") + (set_attr "memory" "store") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 4"))]) + +(define_insn "fxrstor" + [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")] + UNSPECV_FXRSTOR)] + "TARGET_FXSR" + "fxrstor\t%0" + [(set_attr "type" "other") + (set_attr "memory" "load") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 3"))]) + +(define_insn "fxrstor64" + [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")] + UNSPECV_FXRSTOR64)] + "TARGET_64BIT && TARGET_FXSR" + "fxrstor64\t%0" + [(set_attr "type" "other") + (set_attr "memory" "load") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 4"))]) + +(define_int_iterator ANY_XSAVE + [UNSPECV_XSAVE + (UNSPECV_XSAVEOPT "TARGET_XSAVEOPT")]) + +(define_int_iterator ANY_XSAVE64 + [UNSPECV_XSAVE64 + (UNSPECV_XSAVEOPT64 "TARGET_XSAVEOPT")]) + +(define_int_attr xsave + [(UNSPECV_XSAVE "xsave") + (UNSPECV_XSAVE64 "xsave64") + (UNSPECV_XSAVEOPT "xsaveopt") + (UNSPECV_XSAVEOPT64 "xsaveopt64")]) + +(define_insn "<xsave>" + [(set (match_operand:BLK 0 "memory_operand" "=m") + (unspec_volatile:BLK + [(match_operand:DI 1 "register_operand" "A")] + ANY_XSAVE))] + "!TARGET_64BIT && TARGET_XSAVE" + "<xsave>\t%0" + [(set_attr "type" "other") + (set_attr "memory" "store") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 3"))]) + +(define_insn "<xsave>_rex64" + [(set (match_operand:BLK 0 "memory_operand" "=m") + (unspec_volatile:BLK + [(match_operand:SI 1 "register_operand" "a") + (match_operand:SI 2 "register_operand" "d")] + ANY_XSAVE))] + "TARGET_64BIT && TARGET_XSAVE" + "<xsave>\t%0" + [(set_attr "type" "other") + (set_attr "memory" "store") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 3"))]) + +(define_insn "<xsave>" + [(set (match_operand:BLK 0 "memory_operand" "=m") + (unspec_volatile:BLK + [(match_operand:SI 1 "register_operand" "a") + (match_operand:SI 2 "register_operand" "d")] + ANY_XSAVE64))] + "TARGET_64BIT && TARGET_XSAVE" + "<xsave>\t%0" + [(set_attr "type" "other") + (set_attr "memory" "store") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 4"))]) + +(define_insn "xrstor" + [(unspec_volatile:BLK + [(match_operand:BLK 0 "memory_operand" "m") + (match_operand:DI 1 "register_operand" "A")] + UNSPECV_XRSTOR)] + "!TARGET_64BIT && TARGET_XSAVE" + "xrstor\t%0" + [(set_attr "type" "other") + (set_attr "memory" "load") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 3"))]) + +(define_insn "xrstor_rex64" + [(unspec_volatile:BLK + [(match_operand:BLK 0 "memory_operand" "m") + (match_operand:SI 1 "register_operand" "a") + (match_operand:SI 2 "register_operand" "d")] + UNSPECV_XRSTOR)] + "TARGET_64BIT && TARGET_XSAVE" + "xrstor\t%0" + [(set_attr "type" "other") + (set_attr "memory" "load") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 3"))]) + +(define_insn "xrstor64" + [(unspec_volatile:BLK + [(match_operand:BLK 0 "memory_operand" "m") + (match_operand:SI 1 "register_operand" "a") + (match_operand:SI 2 "register_operand" "d")] + UNSPECV_XRSTOR64)] + "TARGET_64BIT && TARGET_XSAVE" + "xrstor64\t%0" + [(set_attr "type" "other") + (set_attr "memory" "load") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 4"))]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Floating-point instructions for atomic compound assignments +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Clobber all floating-point registers on environment save and restore +; to ensure that the TOS value saved at fnstenv is valid after fldenv. +(define_insn "fnstenv" + [(set (match_operand:BLK 0 "memory_operand" "=m") + (unspec_volatile:BLK [(const_int 0)] UNSPECV_FNSTENV)) + (clobber (reg:HI FPCR_REG)) + (clobber (reg:XF ST0_REG)) + (clobber (reg:XF ST1_REG)) + (clobber (reg:XF ST2_REG)) + (clobber (reg:XF ST3_REG)) + (clobber (reg:XF ST4_REG)) + (clobber (reg:XF ST5_REG)) + (clobber (reg:XF ST6_REG)) + (clobber (reg:XF ST7_REG))] + "TARGET_80387" + "fnstenv\t%0" + [(set_attr "type" "other") + (set_attr "memory" "store") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 2"))]) + +(define_insn "fldenv" + [(unspec_volatile [(match_operand:BLK 0 "memory_operand" "m")] + UNSPECV_FLDENV) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:HI FPCR_REG)) + (clobber (reg:XF ST0_REG)) + (clobber (reg:XF ST1_REG)) + (clobber (reg:XF ST2_REG)) + (clobber (reg:XF ST3_REG)) + (clobber (reg:XF ST4_REG)) + (clobber (reg:XF ST5_REG)) + (clobber (reg:XF ST6_REG)) + (clobber (reg:XF ST7_REG))] + "TARGET_80387" + "fldenv\t%0" + [(set_attr "type" "other") + (set_attr "memory" "load") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 2"))]) + +(define_insn "fnstsw" + [(set (match_operand:HI 0 "memory_operand" "=m") + (unspec_volatile:HI [(const_int 0)] UNSPECV_FNSTSW))] + "TARGET_80387" + "fnstsw\t%0" + [(set_attr "type" "other") + (set_attr "memory" "store") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 2"))]) + +(define_insn "fnclex" + [(unspec_volatile [(const_int 0)] UNSPECV_FNCLEX)] + "TARGET_80387" + "fnclex" + [(set_attr "type" "other") + (set_attr "memory" "none") + (set_attr "length" "2")]) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; LWP instructions +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define_expand "lwp_llwpcb" + [(unspec_volatile [(match_operand 0 "register_operand" "r")] + UNSPECV_LLWP_INTRINSIC)] + "TARGET_LWP") + +(define_insn "*lwp_llwpcb<mode>1" + [(unspec_volatile [(match_operand:P 0 "register_operand" "r")] + UNSPECV_LLWP_INTRINSIC)] + "TARGET_LWP" + "llwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "<MODE>") + (set_attr "length" "5")]) + +(define_expand "lwp_slwpcb" + [(set (match_operand 0 "register_operand" "=r") + (unspec_volatile [(const_int 0)] UNSPECV_SLWP_INTRINSIC))] + "TARGET_LWP" +{ + rtx (*insn)(rtx); + + insn = (Pmode == DImode + ? gen_lwp_slwpcbdi + : gen_lwp_slwpcbsi); + + emit_insn (insn (operands[0])); + DONE; +}) + +(define_insn "lwp_slwpcb<mode>" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec_volatile:P [(const_int 0)] UNSPECV_SLWP_INTRINSIC))] + "TARGET_LWP" + "slwpcb\t%0" + [(set_attr "type" "lwp") + (set_attr "mode" "<MODE>") + (set_attr "length" "5")]) + +(define_expand "lwp_lwpval<mode>3" + [(unspec_volatile [(match_operand:SWI48 1 "register_operand" "r") + (match_operand:SI 2 "nonimmediate_operand" "rm") + (match_operand:SI 3 "const_int_operand" "i")] + UNSPECV_LWPVAL_INTRINSIC)] + "TARGET_LWP" + ;; Avoid unused variable warning. + "(void) operands[0];") + +(define_insn "*lwp_lwpval<mode>3_1" + [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "const_int_operand" "i")] + UNSPECV_LWPVAL_INTRINSIC)] + "TARGET_LWP" + "lwpval\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "<MODE>") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 9"))]) + +(define_expand "lwp_lwpins<mode>3" + [(set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(match_operand:SWI48 1 "register_operand" "r") + (match_operand:SI 2 "nonimmediate_operand" "rm") + (match_operand:SI 3 "const_int_operand" "i")] + UNSPECV_LWPINS_INTRINSIC)) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))] + "TARGET_LWP") + +(define_insn "*lwp_lwpins<mode>3_1" + [(set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(match_operand:SWI48 0 "register_operand" "r") + (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "const_int_operand" "i")] + UNSPECV_LWPINS_INTRINSIC))] + "TARGET_LWP" + "lwpins\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "lwp") + (set_attr "mode" "<MODE>") + (set (attr "length") + (symbol_ref "ix86_attr_length_address_default (insn) + 9"))]) + +(define_int_iterator RDFSGSBASE + [UNSPECV_RDFSBASE + UNSPECV_RDGSBASE]) + +(define_int_iterator WRFSGSBASE + [UNSPECV_WRFSBASE + UNSPECV_WRGSBASE]) + +(define_int_attr fsgs + [(UNSPECV_RDFSBASE "fs") + (UNSPECV_RDGSBASE "gs") + (UNSPECV_WRFSBASE "fs") + (UNSPECV_WRGSBASE "gs")]) + +(define_insn "rd<fsgs>base<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec_volatile:SWI48 [(const_int 0)] RDFSGSBASE))] + "TARGET_64BIT && TARGET_FSGSBASE" + "rd<fsgs>base\t%0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "2")]) + +(define_insn "wr<fsgs>base<mode>" + [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")] + WRFSGSBASE)] + "TARGET_64BIT && TARGET_FSGSBASE" + "wr<fsgs>base\t%0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "2")]) + +(define_insn "rdrand<mode>_1" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND)) + (set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDRAND))] + "TARGET_RDRND" + "rdrand\t%0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "1")]) + +(define_insn "rdseed<mode>_1" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDSEED)) + (set (reg:CCC FLAGS_REG) + (unspec_volatile:CCC [(const_int 0)] UNSPECV_RDSEED))] + "TARGET_RDSEED" + "rdseed\t%0" + [(set_attr "type" "other") + (set_attr "prefix_extra" "1")]) + +(define_expand "pause" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +;; Use "rep; nop", instead of "pause", to support older assemblers. +;; They have the same encoding. +(define_insn "*pause" + [(set (match_operand:BLK 0) + (unspec:BLK [(match_dup 0)] UNSPEC_PAUSE))] + "" + "rep%; nop" + [(set_attr "length" "2") + (set_attr "memory" "unknown")]) + +(define_expand "xbegin" + [(set (match_operand:SI 0 "register_operand") + (unspec_volatile:SI [(const_int 0)] UNSPECV_XBEGIN))] + "TARGET_RTM" +{ + rtx label = gen_label_rtx (); + + /* xbegin is emitted as jump_insn, so reload won't be able + to reload its operand. Force the value into AX hard register. */ + rtx ax_reg = gen_rtx_REG (SImode, AX_REG); + emit_move_insn (ax_reg, constm1_rtx); + + emit_jump_insn (gen_xbegin_1 (ax_reg, label)); + + emit_label (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operands[0], ax_reg); + + DONE; +}) + +(define_insn "xbegin_1" + [(set (pc) + (if_then_else (ne (unspec [(const_int 0)] UNSPEC_XBEGIN_ABORT) + (const_int 0)) + (label_ref (match_operand 1)) + (pc))) + (set (match_operand:SI 0 "register_operand" "+a") + (unspec_volatile:SI [(match_dup 0)] UNSPECV_XBEGIN))] + "TARGET_RTM" + "xbegin\t%l1" + [(set_attr "type" "other") + (set_attr "length" "6")]) + +(define_insn "xend" + [(unspec_volatile [(const_int 0)] UNSPECV_XEND)] + "TARGET_RTM" + "xend" + [(set_attr "type" "other") + (set_attr "length" "3")]) + +(define_insn "xabort" + [(unspec_volatile [(match_operand:SI 0 "const_0_to_255_operand" "n")] + UNSPECV_XABORT)] + "TARGET_RTM" + "xabort\t%0" + [(set_attr "type" "other") + (set_attr "length" "3")]) + +(define_expand "xtest" + [(set (match_operand:QI 0 "register_operand") + (unspec_volatile:QI [(const_int 0)] UNSPECV_XTEST))] + "TARGET_RTM" +{ + emit_insn (gen_xtest_1 ()); + + ix86_expand_setcc (operands[0], NE, + gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx); + DONE; +}) + +(define_insn "xtest_1" + [(set (reg:CCZ FLAGS_REG) + (unspec_volatile:CCZ [(const_int 0)] UNSPECV_XTEST))] + "TARGET_RTM" + "xtest" + [(set_attr "type" "other") + (set_attr "length" "3")]) + +(include "mmx.md") +(include "sse.md") +(include "sync.md") |