From b094d6c4bf572654a031ecc4afe675154c886dc5 Mon Sep 17 00:00:00 2001 From: Jing Yu Date: Thu, 22 Jul 2010 14:03:48 -0700 Subject: commit gcc-4.4.3 which is used to build gcc-4.4.3 Android toolchain in master. The source is based on fsf gcc-4.4.3 and contains local patches which are recorded in gcc-4.4.3/README.google. Change-Id: Id8c6d6927df274ae9749196a1cc24dbd9abc9887 --- gcc-4.4.3/gcc/config/i386/i386.md | 21977 ++++++++++++++++++++++++++++++++++++ 1 file changed, 21977 insertions(+) create mode 100644 gcc-4.4.3/gcc/config/i386/i386.md (limited to 'gcc-4.4.3/gcc/config/i386/i386.md') diff --git a/gcc-4.4.3/gcc/config/i386/i386.md b/gcc-4.4.3/gcc/config/i386/i386.md new file mode 100644 index 000000000..22c891c49 --- /dev/null +++ b/gcc-4.4.3/gcc/config/i386/i386.md @@ -0,0 +1,21977 @@ +;; GCC machine description for IA-32 and x86-64. +;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 +;; Free Software Foundation, Inc. +;; Mostly by William Schelter. +;; x86_64 support added by Jan Hubicka +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . */ +;; +;; The original PO technology requires these to be ordered by speed, +;; so that assigner will pick the fastest. +;; +;; See file "rtl.def" for documentation on define_insn, match_*, et. al. +;; +;; The special asm out single letter directives following a '%' are: +;; 'z' mov%z1 would be movl, movw, or movb depending on the mode of +;; operands[1]. +;; 'L' Print the opcode suffix for a 32-bit integer opcode. +;; 'W' Print the opcode suffix for a 16-bit integer opcode. +;; 'B' Print the opcode suffix for an 8-bit integer opcode. +;; 'Q' Print the opcode suffix for a 64-bit float opcode. +;; 'S' Print the opcode suffix for a 32-bit float opcode. +;; 'T' Print the opcode suffix for an 80-bit extended real XFmode float opcode. +;; 'J' Print the appropriate jump operand. +;; +;; 'b' Print the QImode name of the register for the indicated operand. +;; %b0 would print %al if operands[0] is reg 0. +;; 'w' Likewise, print the HImode name of the register. +;; 'k' Likewise, print the SImode name of the register. +;; 'h' Print the QImode name for a "high" register, either ah, bh, ch or dh. +;; 'y' Print "st(0)" instead of "st" as a register. + +;; UNSPEC usage: + +(define_constants + [; Relocation specifiers + (UNSPEC_GOT 0) + (UNSPEC_GOTOFF 1) + (UNSPEC_GOTPCREL 2) + (UNSPEC_GOTTPOFF 3) + (UNSPEC_TPOFF 4) + (UNSPEC_NTPOFF 5) + (UNSPEC_DTPOFF 6) + (UNSPEC_GOTNTPOFF 7) + (UNSPEC_INDNTPOFF 8) + (UNSPEC_PLTOFF 9) + (UNSPEC_MACHOPIC_OFFSET 10) + + ; Prologue support + (UNSPEC_STACK_ALLOC 11) + (UNSPEC_SET_GOT 12) + (UNSPEC_SSE_PROLOGUE_SAVE 13) + (UNSPEC_REG_SAVE 14) + (UNSPEC_DEF_CFA 15) + (UNSPEC_SET_RIP 16) + (UNSPEC_SET_GOT_OFFSET 17) + (UNSPEC_MEMORY_BLOCKAGE 18) + + ; TLS support + (UNSPEC_TP 20) + (UNSPEC_TLS_GD 21) + (UNSPEC_TLS_LD_BASE 22) + (UNSPEC_TLSDESC 23) + + ; Other random patterns + (UNSPEC_SCAS 30) + (UNSPEC_FNSTSW 31) + (UNSPEC_SAHF 32) + (UNSPEC_FSTCW 33) + (UNSPEC_ADD_CARRY 34) + (UNSPEC_FLDCW 35) + (UNSPEC_REP 36) + (UNSPEC_LD_MPIC 38) ; load_macho_picbase + (UNSPEC_TRUNC_NOOP 39) + + ; For SSE/MMX support: + (UNSPEC_FIX_NOTRUNC 40) + (UNSPEC_MASKMOV 41) + (UNSPEC_MOVMSK 42) + (UNSPEC_MOVNT 43) + (UNSPEC_MOVU 44) + (UNSPEC_RCP 45) + (UNSPEC_RSQRT 46) + (UNSPEC_SFENCE 47) + (UNSPEC_PFRCP 49) + (UNSPEC_PFRCPIT1 40) + (UNSPEC_PFRCPIT2 41) + (UNSPEC_PFRSQRT 42) + (UNSPEC_PFRSQIT1 43) + (UNSPEC_MFENCE 44) + (UNSPEC_LFENCE 45) + (UNSPEC_PSADBW 46) + (UNSPEC_LDDQU 47) + (UNSPEC_MS_TO_SYSV_CALL 48) + + ; Generic math support + (UNSPEC_COPYSIGN 50) + (UNSPEC_IEEE_MIN 51) ; not commutative + (UNSPEC_IEEE_MAX 52) ; not commutative + + ; x87 Floating point + (UNSPEC_SIN 60) + (UNSPEC_COS 61) + (UNSPEC_FPATAN 62) + (UNSPEC_FYL2X 63) + (UNSPEC_FYL2XP1 64) + (UNSPEC_FRNDINT 65) + (UNSPEC_FIST 66) + (UNSPEC_F2XM1 67) + (UNSPEC_TAN 68) + (UNSPEC_FXAM 69) + + ; x87 Rounding + (UNSPEC_FRNDINT_FLOOR 70) + (UNSPEC_FRNDINT_CEIL 71) + (UNSPEC_FRNDINT_TRUNC 72) + (UNSPEC_FRNDINT_MASK_PM 73) + (UNSPEC_FIST_FLOOR 74) + (UNSPEC_FIST_CEIL 75) + + ; x87 Double output FP + (UNSPEC_SINCOS_COS 80) + (UNSPEC_SINCOS_SIN 81) + (UNSPEC_XTRACT_FRACT 84) + (UNSPEC_XTRACT_EXP 85) + (UNSPEC_FSCALE_FRACT 86) + (UNSPEC_FSCALE_EXP 87) + (UNSPEC_FPREM_F 88) + (UNSPEC_FPREM_U 89) + (UNSPEC_FPREM1_F 90) + (UNSPEC_FPREM1_U 91) + + (UNSPEC_C2_FLAG 95) + (UNSPEC_FXAM_MEM 96) + + ; SSP patterns + (UNSPEC_SP_SET 100) + (UNSPEC_SP_TEST 101) + (UNSPEC_SP_TLS_SET 102) + (UNSPEC_SP_TLS_TEST 103) + + ; SSSE3 + (UNSPEC_PSHUFB 120) + (UNSPEC_PSIGN 121) + (UNSPEC_PALIGNR 122) + + ; For SSE4A support + (UNSPEC_EXTRQI 130) + (UNSPEC_EXTRQ 131) + (UNSPEC_INSERTQI 132) + (UNSPEC_INSERTQ 133) + + ; For SSE4.1 support + (UNSPEC_BLENDV 134) + (UNSPEC_INSERTPS 135) + (UNSPEC_DP 136) + (UNSPEC_MOVNTDQA 137) + (UNSPEC_MPSADBW 138) + (UNSPEC_PHMINPOSUW 139) + (UNSPEC_PTEST 140) + (UNSPEC_ROUND 141) + + ; For SSE4.2 support + (UNSPEC_CRC32 143) + (UNSPEC_PCMPESTR 144) + (UNSPEC_PCMPISTR 145) + + ;; For SSE5 + (UNSPEC_SSE5_INTRINSIC 150) + (UNSPEC_SSE5_UNSIGNED_CMP 151) + (UNSPEC_SSE5_TRUEFALSE 152) + (UNSPEC_SSE5_PERMUTE 153) + (UNSPEC_FRCZ 154) + (UNSPEC_CVTPH2PS 155) + (UNSPEC_CVTPS2PH 156) + + ; For AES support + (UNSPEC_AESENC 159) + (UNSPEC_AESENCLAST 160) + (UNSPEC_AESDEC 161) + (UNSPEC_AESDECLAST 162) + (UNSPEC_AESIMC 163) + (UNSPEC_AESKEYGENASSIST 164) + + ; For PCLMUL support + (UNSPEC_PCLMUL 165) + + ; For AVX support + (UNSPEC_PCMP 166) + (UNSPEC_VPERMIL 167) + (UNSPEC_VPERMIL2F128 168) + (UNSPEC_MASKLOAD 169) + (UNSPEC_MASKSTORE 170) + (UNSPEC_CAST 171) + (UNSPEC_VTESTP 172) + ]) + +(define_constants + [(UNSPECV_BLOCKAGE 0) + (UNSPECV_STACK_PROBE 1) + (UNSPECV_EMMS 2) + (UNSPECV_LDMXCSR 3) + (UNSPECV_STMXCSR 4) + (UNSPECV_FEMMS 5) + (UNSPECV_CLFLUSH 6) + (UNSPECV_ALIGN 7) + (UNSPECV_MONITOR 8) + (UNSPECV_MWAIT 9) + (UNSPECV_CMPXCHG 10) + (UNSPECV_XCHG 12) + (UNSPECV_LOCK 13) + (UNSPECV_PROLOGUE_USE 14) + (UNSPECV_CLD 15) + (UNSPECV_VZEROALL 16) + (UNSPECV_VZEROUPPER 17) + ]) + +;; Constants to represent pcomtrue/pcomfalse variants +(define_constants + [(PCOM_FALSE 0) + (PCOM_TRUE 1) + (COM_FALSE_S 2) + (COM_FALSE_P 3) + (COM_TRUE_S 4) + (COM_TRUE_P 5) + ]) + +;; Constants used in the SSE5 pperm instruction +(define_constants + [(PPERM_SRC 0x00) /* copy source */ + (PPERM_INVERT 0x20) /* invert source */ + (PPERM_REVERSE 0x40) /* bit reverse source */ + (PPERM_REV_INV 0x60) /* bit reverse & invert src */ + (PPERM_ZERO 0x80) /* all 0's */ + (PPERM_ONES 0xa0) /* all 1's */ + (PPERM_SIGN 0xc0) /* propagate sign bit */ + (PPERM_INV_SIGN 0xe0) /* invert & propagate sign */ + (PPERM_SRC1 0x00) /* use first source byte */ + (PPERM_SRC2 0x10) /* use second source byte */ + ]) + +;; Registers by name. +(define_constants + [(AX_REG 0) + (DX_REG 1) + (CX_REG 2) + (BX_REG 3) + (SI_REG 4) + (DI_REG 5) + (BP_REG 6) + (SP_REG 7) + (ST0_REG 8) + (ST1_REG 9) + (ST2_REG 10) + (ST3_REG 11) + (ST4_REG 12) + (ST5_REG 13) + (ST6_REG 14) + (ST7_REG 15) + (FLAGS_REG 17) + (FPSR_REG 18) + (FPCR_REG 19) + (XMM0_REG 21) + (XMM1_REG 22) + (XMM2_REG 23) + (XMM3_REG 24) + (XMM4_REG 25) + (XMM5_REG 26) + (XMM6_REG 27) + (XMM7_REG 28) + (MM0_REG 29) + (MM1_REG 30) + (MM2_REG 31) + (MM3_REG 32) + (MM4_REG 33) + (MM5_REG 34) + (MM6_REG 35) + (MM7_REG 36) + (R8_REG 37) + (R9_REG 38) + (R10_REG 39) + (R11_REG 40) + (R13_REG 42) + (XMM8_REG 45) + (XMM9_REG 46) + (XMM10_REG 47) + (XMM11_REG 48) + (XMM12_REG 49) + (XMM13_REG 50) + (XMM14_REG 51) + (XMM15_REG 52) + ]) + +;; Insns whose names begin with "x86_" are emitted by gen_FOO calls +;; from i386.c. + +;; In C guard expressions, put expressions which may be compile-time +;; constants first. This allows for better optimization. For +;; example, write "TARGET_64BIT && reload_completed", not +;; "reload_completed && TARGET_64BIT". + + +;; Processor type. +(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2, + generic64,amdfam10" + (const (symbol_ref "ix86_schedule"))) + +;; A basic instruction type. Refinements due to arguments to be +;; provided in other attributes. +(define_attr "type" + "other,multi, + alu,alu1,negnot,imov,imovx,lea, + incdec,ishift,ishift1,rotate,rotate1,imul,idiv, + icmp,test,ibr,setcc,icmov, + push,pop,call,callv,leave, + str,bitmanip, + fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint, + sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins, + ssemuladd,sse4arg, + mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" + (const_string "other")) + +;; Main data type used by the insn +(define_attr "mode" + "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF" + (const_string "unknown")) + +;; The CPU unit operations uses. +(define_attr "unit" "integer,i387,sse,mmx,unknown" + (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") + (const_string "i387") + (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul, + sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt, + ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg") + (const_string "sse") + (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") + (const_string "mmx") + (eq_attr "type" "other") + (const_string "unknown")] + (const_string "integer"))) + +;; The (bounding maximum) length of an instruction immediate. +(define_attr "length_immediate" "" + (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave, + bitmanip") + (const_int 0) + (eq_attr "unit" "i387,sse,mmx") + (const_int 0) + (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1, + imul,icmp,push,pop") + (symbol_ref "ix86_attr_length_immediate_default(insn,1)") + (eq_attr "type" "imov,test") + (symbol_ref "ix86_attr_length_immediate_default(insn,0)") + (eq_attr "type" "call") + (if_then_else (match_operand 0 "constant_call_address_operand" "") + (const_int 4) + (const_int 0)) + (eq_attr "type" "callv") + (if_then_else (match_operand 1 "constant_call_address_operand" "") + (const_int 4) + (const_int 0)) + ;; We don't know the size before shorten_branches. Expect + ;; the instruction to fit for better scheduling. + (eq_attr "type" "ibr") + (const_int 1) + ] + (symbol_ref "/* Update immediate_length and other attributes! */ + gcc_unreachable (),1"))) + +;; The (bounding maximum) length of an instruction address. +(define_attr "length_address" "" + (cond [(eq_attr "type" "str,other,multi,fxch") + (const_int 0) + (and (eq_attr "type" "call") + (match_operand 0 "constant_call_address_operand" "")) + (const_int 0) + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand" "")) + (const_int 0) + ] + (symbol_ref "ix86_attr_length_address_default (insn)"))) + +;; Set when length prefix is used. +(define_attr "prefix_data16" "" + (if_then_else (ior (eq_attr "mode" "HI") + (and (eq_attr "unit" "sse") (eq_attr "mode" "V2DF"))) + (const_int 1) + (const_int 0))) + +;; Set when string REP prefix is used. +(define_attr "prefix_rep" "" + (if_then_else (and (eq_attr "unit" "sse") (eq_attr "mode" "SF,DF")) + (const_int 1) + (const_int 0))) + +;; Set when 0f opcode prefix is used. +(define_attr "prefix_0f" "" + (if_then_else + (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip") + (eq_attr "unit" "sse,mmx")) + (const_int 1) + (const_int 0))) + +;; Set when REX opcode prefix is used. +(define_attr "prefix_rex" "" + (cond [(and (eq_attr "mode" "DI") + (eq_attr "type" "!push,pop,call,callv,leave,ibr")) + (const_int 1) + (and (eq_attr "mode" "QI") + (ne (symbol_ref "x86_extended_QIreg_mentioned_p (insn)") + (const_int 0))) + (const_int 1) + (ne (symbol_ref "x86_extended_reg_mentioned_p (insn)") + (const_int 0)) + (const_int 1) + ] + (const_int 0))) + +;; There are also additional prefixes in SSSE3. +(define_attr "prefix_extra" "" (const_int 0)) + +;; Prefix used: original, VEX or maybe VEX. +(define_attr "prefix" "orig,vex,maybe_vex" + (if_then_else (eq_attr "mode" "OI,V8SF,V4DF") + (const_string "vex") + (const_string "orig"))) + +;; There is a 8bit immediate for VEX. +(define_attr "prefix_vex_imm8" "" (const_int 0)) + +;; VEX W bit is used. +(define_attr "prefix_vex_w" "" (const_int 0)) + +;; The length of VEX prefix +(define_attr "length_vex" "" + (if_then_else (eq_attr "prefix_0f" "1") + (if_then_else (eq_attr "prefix_vex_w" "1") + (symbol_ref "ix86_attr_length_vex_default (insn, 1, 1)") + (symbol_ref "ix86_attr_length_vex_default (insn, 1, 0)")) + (if_then_else (eq_attr "prefix_vex_w" "1") + (symbol_ref "ix86_attr_length_vex_default (insn, 0, 1)") + (symbol_ref "ix86_attr_length_vex_default (insn, 0, 0)")))) + +;; Set when modrm byte is used. +(define_attr "modrm" "" + (cond [(eq_attr "type" "str,leave") + (const_int 0) + (eq_attr "unit" "i387") + (const_int 0) + (and (eq_attr "type" "incdec") + (ior (match_operand:SI 1 "register_operand" "") + (match_operand:HI 1 "register_operand" ""))) + (const_int 0) + (and (eq_attr "type" "push") + (not (match_operand 1 "memory_operand" ""))) + (const_int 0) + (and (eq_attr "type" "pop") + (not (match_operand 0 "memory_operand" ""))) + (const_int 0) + (and (eq_attr "type" "imov") + (ior (and (match_operand 0 "register_operand" "") + (match_operand 1 "immediate_operand" "")) + (ior (and (match_operand 0 "ax_reg_operand" "") + (match_operand 1 "memory_displacement_only_operand" "")) + (and (match_operand 0 "memory_displacement_only_operand" "") + (match_operand 1 "ax_reg_operand" ""))))) + (const_int 0) + (and (eq_attr "type" "call") + (match_operand 0 "constant_call_address_operand" "")) + (const_int 0) + (and (eq_attr "type" "callv") + (match_operand 1 "constant_call_address_operand" "")) + (const_int 0) + ] + (const_int 1))) + +;; The (bounding maximum) length of an instruction in bytes. +;; ??? fistp and frndint are in fact fldcw/{fistp,frndint}/fldcw sequences. +;; Later we may want to split them and compute proper length as for +;; other insns. +(define_attr "length" "" + (cond [(eq_attr "type" "other,multi,fistp,frndint") + (const_int 16) + (eq_attr "type" "fcmp") + (const_int 4) + (eq_attr "unit" "i387") + (plus (const_int 2) + (plus (attr "prefix_data16") + (attr "length_address"))) + (ior (eq_attr "prefix" "vex") + (and (eq_attr "prefix" "maybe_vex") + (ne (symbol_ref "TARGET_AVX") (const_int 0)))) + (plus (attr "length_vex") + (plus (attr "prefix_vex_imm8") + (plus (attr "modrm") + (attr "length_address"))))] + (plus (plus (attr "modrm") + (plus (attr "prefix_0f") + (plus (attr "prefix_rex") + (plus (attr "prefix_extra") + (const_int 1))))) + (plus (attr "prefix_rep") + (plus (attr "prefix_data16") + (plus (attr "length_immediate") + (attr "length_address"))))))) + +;; The `memory' attribute is `none' if no memory is referenced, `load' or +;; `store' if there is a simple memory reference therein, or `unknown' +;; if the instruction is complex. + +(define_attr "memory" "none,load,store,both,unknown" + (cond [(eq_attr "type" "other,multi,str") + (const_string "unknown") + (eq_attr "type" "lea,fcmov,fpspc") + (const_string "none") + (eq_attr "type" "fistp,leave") + (const_string "both") + (eq_attr "type" "frndint") + (const_string "load") + (eq_attr "type" "push") + (if_then_else (match_operand 1 "memory_operand" "") + (const_string "both") + (const_string "store")) + (eq_attr "type" "pop") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "both") + (const_string "load")) + (eq_attr "type" "setcc") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "store") + (const_string "none")) + (eq_attr "type" "icmp,test,ssecmp,ssecomi,mmxcmp,fcmp") + (if_then_else (ior (match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")) + (const_string "load") + (const_string "none")) + (eq_attr "type" "ibr") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "load") + (const_string "none")) + (eq_attr "type" "call") + (if_then_else (match_operand 0 "constant_call_address_operand" "") + (const_string "none") + (const_string "load")) + (eq_attr "type" "callv") + (if_then_else (match_operand 1 "constant_call_address_operand" "") + (const_string "none") + (const_string "load")) + (and (eq_attr "type" "alu1,negnot,ishift1,sselog1") + (match_operand 1 "memory_operand" "")) + (const_string "both") + (and (match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")) + (const_string "both") + (match_operand 0 "memory_operand" "") + (const_string "store") + (match_operand 1 "memory_operand" "") + (const_string "load") + (and (eq_attr "type" + "!alu1,negnot,ishift1, + imov,imovx,icmp,test,bitmanip, + fmov,fcmp,fsgn, + sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,sselog1, + sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt") + (match_operand 2 "memory_operand" "")) + (const_string "load") + (and (eq_attr "type" "icmov,ssemuladd,sse4arg") + (match_operand 3 "memory_operand" "")) + (const_string "load") + ] + (const_string "none"))) + +;; Indicates if an instruction has both an immediate and a displacement. + +(define_attr "imm_disp" "false,true,unknown" + (cond [(eq_attr "type" "other,multi") + (const_string "unknown") + (and (eq_attr "type" "icmp,test,imov,alu1,ishift1,rotate1") + (and (match_operand 0 "memory_displacement_operand" "") + (match_operand 1 "immediate_operand" ""))) + (const_string "true") + (and (eq_attr "type" "alu,ishift,rotate,imul,idiv") + (and (match_operand 0 "memory_displacement_operand" "") + (match_operand 2 "immediate_operand" ""))) + (const_string "true") + ] + (const_string "false"))) + +;; Indicates if an FP operation has an integer source. + +(define_attr "fp_int_src" "false,true" + (const_string "false")) + +;; Defines rounding mode of an FP operation. + +(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any" + (const_string "any")) + +;; Describe a user's asm statement. +(define_asm_attributes + [(set_attr "length" "128") + (set_attr "type" "multi")]) + +;; All integer comparison codes. +(define_code_iterator int_cond [ne eq ge gt le lt geu gtu leu ltu ]) + +;; All floating-point comparison codes. +(define_code_iterator fp_cond [unordered ordered + uneq unge ungt unle unlt ltgt ]) + +(define_code_iterator plusminus [plus minus]) + +(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus]) + +;; Base name for define_insn +(define_code_attr plusminus_insn + [(plus "add") (ss_plus "ssadd") (us_plus "usadd") + (minus "sub") (ss_minus "sssub") (us_minus "ussub")]) + +;; Base name for insn mnemonic. +(define_code_attr plusminus_mnemonic + [(plus "add") (ss_plus "adds") (us_plus "addus") + (minus "sub") (ss_minus "subs") (us_minus "subus")]) + +;; Mark commutative operators as such in constraints. +(define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%") + (minus "") (ss_minus "") (us_minus "")]) + +;; Mapping of signed max and min +(define_code_iterator smaxmin [smax smin]) + +;; Mapping of unsigned max and min +(define_code_iterator umaxmin [umax umin]) + +;; Mapping of signed/unsigned max and min +(define_code_iterator maxmin [smax smin umax umin]) + +;; Base name for integer and FP insn mnemonic +(define_code_attr maxminiprefix [(smax "maxs") (smin "mins") + (umax "maxu") (umin "minu")]) +(define_code_attr maxminfprefix [(smax "max") (smin "min")]) + +;; Mapping of parallel logic operators +(define_code_iterator plogic [and ior xor]) + +;; Base name for insn mnemonic. +(define_code_attr plogicprefix [(and "and") (ior "or") (xor "xor")]) + +;; Mapping of abs neg operators +(define_code_iterator absneg [abs neg]) + +;; Base name for x87 insn mnemonic. +(define_code_attr absnegprefix [(abs "abs") (neg "chs")]) + +;; All single word integer modes. +(define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")]) + +;; Single word integer modes without QImode. +(define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")]) + +;; Instruction suffix for integer modes. +(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")]) + +;; Register class for integer modes. +(define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")]) + +;; Immediate operand constraint for integer modes. +(define_mode_attr i [(QI "n") (HI "n") (SI "i") (DI "e")]) + +;; General operand predicate for integer modes. +(define_mode_attr general_operand + [(QI "general_operand") + (HI "general_operand") + (SI "general_operand") + (DI "x86_64_general_operand")]) + +;; SSE and x87 SFmode and DFmode floating point modes +(define_mode_iterator MODEF [SF DF]) + +;; All x87 floating point modes +(define_mode_iterator X87MODEF [SF DF XF]) + +;; All integer modes handled by x87 fisttp operator. +(define_mode_iterator X87MODEI [HI SI DI]) + +;; All integer modes handled by integer x87 operators. +(define_mode_iterator X87MODEI12 [HI SI]) + +;; All integer modes handled by SSE cvtts?2si* operators. +(define_mode_iterator SSEMODEI24 [SI DI]) + +;; SSE asm suffix for floating point modes +(define_mode_attr ssemodefsuffix [(SF "s") (DF "d")]) + +;; SSE vector mode corresponding to a scalar mode +(define_mode_attr ssevecmode + [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")]) + +;; Instruction suffix for REX 64bit operators. +(define_mode_attr rex64suffix [(SI "") (DI "{q}")]) + +;; This mode iterator allows :P to be used for patterns that operate on +;; pointer-sized quantities. Exactly one of the two alternatives will match. +(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")]) + + +;; Scheduling descriptions + +(include "pentium.md") +(include "ppro.md") +(include "k6.md") +(include "athlon.md") +(include "geode.md") + + +;; Operand and operator predicates and constraints + +(include "predicates.md") +(include "constraints.md") + + +;; Compare instructions. + +;; All compare insns have expanders that save the operands away without +;; actually generating RTL. The bCOND or sCOND (emitted immediately +;; after the cmp) will actually emit the cmpM. + +(define_expand "cmpti" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "x86_64_general_operand" "")))] + "TARGET_64BIT" +{ + if (MEM_P (operands[0]) && MEM_P (operands[1])) + operands[0] = force_reg (TImode, operands[0]); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_expand "cmpdi" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "x86_64_general_operand" "")))] + "" +{ + if (MEM_P (operands[0]) && MEM_P (operands[1])) + operands[0] = force_reg (DImode, operands[0]); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_expand "cmpsi" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SI 0 "cmpsi_operand" "") + (match_operand:SI 1 "general_operand" "")))] + "" +{ + if (MEM_P (operands[0]) && MEM_P (operands[1])) + operands[0] = force_reg (SImode, operands[0]); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_expand "cmphi" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:HI 0 "nonimmediate_operand" "") + (match_operand:HI 1 "general_operand" "")))] + "" +{ + if (MEM_P (operands[0]) && MEM_P (operands[1])) + operands[0] = force_reg (HImode, operands[0]); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_expand "cmpqi" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "general_operand" "")))] + "TARGET_QIMODE_MATH" +{ + if (MEM_P (operands[0]) && MEM_P (operands[1])) + operands[0] = force_reg (QImode, operands[0]); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_insn "cmpdi_ccno_1_rex64" + [(set (reg FLAGS_REG) + (compare (match_operand:DI 0 "nonimmediate_operand" "r,?mr") + (match_operand:DI 1 "const0_operand" "")))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "@ + test{q}\t%0, %0 + cmp{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "DI")]) + +(define_insn "*cmpdi_minus_1_rex64" + [(set (reg FLAGS_REG) + (compare (minus:DI (match_operand:DI 0 "nonimmediate_operand" "rm,r") + (match_operand:DI 1 "x86_64_general_operand" "re,mr")) + (const_int 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)" + "cmp{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "DI")]) + +(define_expand "cmpdi_1_rex64" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" "")))] + "TARGET_64BIT" + "") + +(define_insn "cmpdi_1_insn_rex64" + [(set (reg FLAGS_REG) + (compare (match_operand:DI 0 "nonimmediate_operand" "mr,r") + (match_operand:DI 1 "x86_64_general_operand" "re,mr")))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "DI")]) + + +(define_insn "*cmpsi_ccno_1" + [(set (reg FLAGS_REG) + (compare (match_operand:SI 0 "nonimmediate_operand" "r,?mr") + (match_operand:SI 1 "const0_operand" "")))] + "ix86_match_ccmode (insn, CCNOmode)" + "@ + test{l}\t%0, %0 + cmp{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "SI")]) + +(define_insn "*cmpsi_minus_1" + [(set (reg FLAGS_REG) + (compare (minus:SI (match_operand:SI 0 "nonimmediate_operand" "rm,r") + (match_operand:SI 1 "general_operand" "ri,mr")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" + "cmp{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "SI")]) + +(define_expand "cmpsi_1" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "general_operand" "")))] + "" + "") + +(define_insn "*cmpsi_1_insn" + [(set (reg FLAGS_REG) + (compare (match_operand:SI 0 "nonimmediate_operand" "rm,r") + (match_operand:SI 1 "general_operand" "ri,mr")))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && ix86_match_ccmode (insn, CCmode)" + "cmp{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "SI")]) + +(define_insn "*cmphi_ccno_1" + [(set (reg FLAGS_REG) + (compare (match_operand:HI 0 "nonimmediate_operand" "r,?mr") + (match_operand:HI 1 "const0_operand" "")))] + "ix86_match_ccmode (insn, CCNOmode)" + "@ + test{w}\t%0, %0 + cmp{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "HI")]) + +(define_insn "*cmphi_minus_1" + [(set (reg FLAGS_REG) + (compare (minus:HI (match_operand:HI 0 "nonimmediate_operand" "rm,r") + (match_operand:HI 1 "general_operand" "rn,mr")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" + "cmp{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "HI")]) + +(define_insn "*cmphi_1" + [(set (reg FLAGS_REG) + (compare (match_operand:HI 0 "nonimmediate_operand" "rm,r") + (match_operand:HI 1 "general_operand" "rn,mr")))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && ix86_match_ccmode (insn, CCmode)" + "cmp{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "HI")]) + +(define_insn "*cmpqi_ccno_1" + [(set (reg FLAGS_REG) + (compare (match_operand:QI 0 "nonimmediate_operand" "q,?mq") + (match_operand:QI 1 "const0_operand" "")))] + "ix86_match_ccmode (insn, CCNOmode)" + "@ + test{b}\t%0, %0 + cmp{b}\t{$0, %0|%0, 0}" + [(set_attr "type" "test,icmp") + (set_attr "length_immediate" "0,1") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_1" + [(set (reg FLAGS_REG) + (compare (match_operand:QI 0 "nonimmediate_operand" "qm,q") + (match_operand:QI 1 "general_operand" "qn,mq")))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_minus_1" + [(set (reg FLAGS_REG) + (compare (minus:QI (match_operand:QI 0 "nonimmediate_operand" "qm,q") + (match_operand:QI 1 "general_operand" "qn,mq")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCGOCmode)" + "cmp{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_1" + [(set (reg FLAGS_REG) + (compare + (match_operand:QI 0 "general_operand" "Qm") + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_1_rex64" + [(set (reg FLAGS_REG) + (compare + (match_operand:QI 0 "register_operand" "Q") + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_2" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "const0_operand" "")))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t%h0, %h0" + [(set_attr "type" "test") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_expand "cmpqi_ext_3" + [(set (reg:CC FLAGS_REG) + (compare:CC + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "general_operand" "")))] + "" + "") + +(define_insn "cmpqi_ext_3_insn" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "general_operand" "Qmn")))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "cmpqi_ext_3_insn_rex64" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "nonmemory_operand" "Qn")))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +(define_insn "*cmpqi_ext_4" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (subreg:QI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + +;; These implement float point compares. +;; %%% See if we can get away with VOIDmode operands on the actual insns, +;; which would allow mix and match FP modes on the compares. Which is what +;; the old patterns did, but with many more of them. + +(define_expand "cmpxf" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:XF 0 "nonmemory_operand" "") + (match_operand:XF 1 "nonmemory_operand" "")))] + "TARGET_80387" +{ + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +(define_expand "cmp" + [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand:MODEF 0 "cmp_fp_expander_operand" "") + (match_operand:MODEF 1 "cmp_fp_expander_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + DONE; +}) + +;; FP compares, step 1: +;; Set the FP condition codes. +;; +;; CCFPmode compare with exceptions +;; CCFPUmode compare with no exceptions + +;; We may not use "#" to split and emit these, since the REG_DEAD notes +;; used to manage the reg stack popping would not be preserved. + +(define_insn "*cmpfp_0" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" ""))] + UNSPEC_FNSTSW))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) + +(define_insn_and_split "*cmpfp_0_cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" ""))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) + +(define_insn "*cmpfp_xf" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "XF")]) + +(define_insn_and_split "*cmpfp_xf_cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:XF 1 "register_operand" "f") + (match_operand:XF 2 "register_operand" "f"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 + && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "XF")]) + +(define_insn "*cmpfp_" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm"))] + UNSPEC_FNSTSW))] + "TARGET_80387" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "")]) + +(define_insn_and_split "*cmpfp__cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand:MODEF 1 "register_operand" "f") + (match_operand:MODEF 2 "nonimmediate_operand" "fm"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "TARGET_80387 + && TARGET_SAHF && !TARGET_CMOVE" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "")]) + +(define_insn "*cmpfp_u" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFPU + (match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f"))] + UNSPEC_FNSTSW))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "* return output_fp_compare (insn, operands, 0, 1);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) + +(define_insn_and_split "*cmpfp_u_cc" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU + (match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f"))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && GET_MODE (operands[1]) == GET_MODE (operands[2])" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFPU (match_dup 1)(match_dup 2))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF")))]) + +(define_insn "*cmpfp_" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operator 3 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "m")]))] + UNSPEC_FNSTSW))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun)) + && (GET_MODE (operands [3]) == GET_MODE (operands[1]))" + "* return output_fp_compare (insn, operands, 0, 0);" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn_and_split "*cmpfp__cc" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (match_operand 1 "register_operand" "f") + (match_operator 3 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "m")]))) + (clobber (match_operand:HI 0 "register_operand" "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_SAHF && !TARGET_CMOVE + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun)) + && (GET_MODE (operands [3]) == GET_MODE (operands[1]))" + "#" + "&& reload_completed" + [(set (match_dup 0) + (unspec:HI + [(compare:CCFP + (match_dup 1) + (match_op_dup 3 [(match_dup 2)]))] + UNSPEC_FNSTSW)) + (set (reg:CC FLAGS_REG) + (unspec:CC [(match_dup 0)] UNSPEC_SAHF))] + "" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +;; FP compares, step 2 +;; Move the fpsw to ax. + +(define_insn "x86_fnstsw_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI [(reg:CCFP FPSR_REG)] UNSPEC_FNSTSW))] + "TARGET_80387" + "fnstsw\t%0" + [(set_attr "length" "2") + (set_attr "mode" "SI") + (set_attr "unit" "i387")]) + +;; FP compares, step 3 +;; Get ax into flags, general case. + +(define_insn "x86_sahf_1" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:HI 0 "register_operand" "a")] + UNSPEC_SAHF))] + "TARGET_SAHF" +{ +#ifdef HAVE_AS_IX86_SAHF + return "sahf"; +#else + return ".byte\t0x9e"; +#endif +} + [(set_attr "length" "1") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct") + (set_attr "mode" "SI")]) + +;; Pentium Pro can do steps 1 through 3 in one go. +;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes) +(define_insn "*cmpfp_i_mixed" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP (match_operand 0 "register_operand" "f,x") + (match_operand 1 "nonimmediate_operand" "f,xm")))] + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 0);" + [(set_attr "type" "fcmp,ssecomi") + (set_attr "prefix" "orig,maybe_vex") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) + +(define_insn "*cmpfp_i_sse" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP (match_operand 0 "register_operand" "x") + (match_operand 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 0);" + [(set_attr "type" "ssecomi") + (set_attr "prefix" "maybe_vex") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) + +(define_insn "*cmpfp_i_i387" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP (match_operand 0 "register_operand" "f") + (match_operand 1 "register_operand" "f")))] + "X87_FLOAT_MODE_P (GET_MODE (operands[0])) + && TARGET_CMOVE + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 0);" + [(set_attr "type" "fcmp") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) + +(define_insn "*cmpfp_iu_mixed" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU (match_operand 0 "register_operand" "f,x") + (match_operand 1 "nonimmediate_operand" "f,xm")))] + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "fcmp,ssecomi") + (set_attr "prefix" "orig,maybe_vex") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) + +(define_insn "*cmpfp_iu_sse" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU (match_operand 0 "register_operand" "x") + (match_operand 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[0])) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "ssecomi") + (set_attr "prefix" "maybe_vex") + (set (attr "mode") + (if_then_else (match_operand:SF 1 "" "") + (const_string "SF") + (const_string "DF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) + +(define_insn "*cmpfp_iu_387" + [(set (reg:CCFPU FLAGS_REG) + (compare:CCFPU (match_operand 0 "register_operand" "f") + (match_operand 1 "register_operand" "f")))] + "X87_FLOAT_MODE_P (GET_MODE (operands[0])) + && TARGET_CMOVE + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH) + && GET_MODE (operands[0]) == GET_MODE (operands[1])" + "* return output_fp_compare (insn, operands, 1, 1);" + [(set_attr "type" "fcmp") + (set (attr "mode") + (cond [(match_operand:SF 1 "" "") + (const_string "SF") + (match_operand:DF 1 "" "") + (const_string "DF") + ] + (const_string "XF"))) + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "direct")]) + +;; Move instructions. + +;; General case of fullword move. + +(define_expand "movsi" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "general_operand" ""))] + "" + "ix86_expand_move (SImode, operands); DONE;") + +;; Push/pop instructions. They are separate since autoinc/dec is not a +;; general_operand. +;; +;; %%% We don't use a post-inc memory reference because x86 is not a +;; general AUTO_INC_DEC host, which impacts how it is treated in flow. +;; Changing this impacts compiler performance on other non-AUTO_INC_DEC +;; targets without our curiosities, and it is just as easy to represent +;; this differently. + +(define_insn "*pushsi2" + [(set (match_operand:SI 0 "push_operand" "=<") + (match_operand:SI 1 "general_no_elim_operand" "ri*m"))] + "!TARGET_64BIT" + "push{l}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +;; For 64BIT abi we always round up to 8 bytes. +(define_insn "*pushsi2_rex64" + [(set (match_operand:SI 0 "push_operand" "=X") + (match_operand:SI 1 "nonmemory_no_elim_operand" "ri"))] + "TARGET_64BIT" + "push{q}\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +(define_insn "*pushsi2_prologue" + [(set (match_operand:SI 0 "push_operand" "=<") + (match_operand:SI 1 "general_no_elim_operand" "ri*m")) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" + "push{l}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +(define_insn "*popsi1_epilogue" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") + (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" + "pop{l}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "SI")]) + +(define_insn "popsi1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r*m") + (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) (const_int 4)))] + "!TARGET_64BIT" + "pop{l}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "SI")]) + +(define_insn "*movsi_xor" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "const0_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "xor{l}\t%0, %0" + [(set_attr "type" "alu1") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movsi_or" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "immediate_operand" "i")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && operands[1] == constm1_rtx" +{ + operands[1] = constm1_rtx; + return "or{l}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "alu1") + (set_attr "mode" "SI") + (set_attr "length_immediate" "1")]) + +(define_insn "*movsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" + "=r,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x") + (match_operand:SI 1 "general_operand" + "g ,ri,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r ,m "))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSELOG1: + if (get_attr_mode (insn) == MODE_TI) + return "%vpxor\t%0, %d0"; + return "%vxorps\t%0, %d0"; + + case TYPE_SSEMOV: + switch (get_attr_mode (insn)) + { + case MODE_TI: + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_SI: + return "%vmovd\t{%1, %0|%0, %1}"; + case MODE_SF: + return "%vmovss\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } + + case TYPE_MMX: + return "pxor\t%0, %0"; + + case TYPE_MMXMOV: + if (get_attr_mode (insn) == MODE_DI) + return "movq\t{%1, %0|%0, %1}"; + return "movd\t{%1, %0|%0, %1}"; + + case TYPE_LEA: + return "lea{l}\t{%1, %0|%0, %1}"; + + default: + gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); + return "mov{l}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "mmx") + (eq_attr "alternative" "3,4,5") + (const_string "mmxmov") + (eq_attr "alternative" "6") + (const_string "sselog1") + (eq_attr "alternative" "7,8,9,10,11") + (const_string "ssemov") + (match_operand:DI 1 "pic_32bit_operand" "") + (const_string "lea") + ] + (const_string "imov"))) + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "0,1,2,3,4,5") + (const_string "orig") + (const_string "maybe_vex"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (const_string "DI") + (eq_attr "alternative" "6,7") + (if_then_else + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (and (eq_attr "alternative" "8,9,10,11") + (eq (symbol_ref "TARGET_SSE2") (const_int 0))) + (const_string "SF") + ] + (const_string "SI")))]) + +;; Stores and loads of ax to arbitrary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabssi_1_rex64" + [(set (mem:SI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:SI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" + "@ + movabs{l}\t{%1, %P0|%P0, %1} + mov{l}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*movabssi_2_rex64" + [(set (match_operand:SI 0 "register_operand" "=a,r") + (mem:SI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT && ix86_check_movabs (insn, 1)" + "@ + movabs{l}\t{%P1, %0|%0, %P1} + mov{l}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "SI")]) + +(define_insn "*swapsi" + [(set (match_operand:SI 0 "register_operand" "+r") + (match_operand:SI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "" + "xchg{l}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double")]) + +(define_expand "movhi" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (match_operand:HI 1 "general_operand" ""))] + "" + "ix86_expand_move (HImode, operands); DONE;") + +(define_insn "*pushhi2" + [(set (match_operand:HI 0 "push_operand" "=X") + (match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))] + "!TARGET_64BIT" + "push{l}\t%k1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +;; For 64BIT abi we always round up to 8 bytes. +(define_insn "*pushhi2_rex64" + [(set (match_operand:HI 0 "push_operand" "=X") + (match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))] + "TARGET_64BIT" + "push{q}\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "DI")]) + +(define_insn "*movhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:HI 1 "general_operand" "r,rn,rm,rn"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + /* movzwl is faster than movw on p2 due to partial word stalls, + though not as fast as an aligned movl. */ + return "movz{wl|x}\t{%1, %k0|%k0, %1}"; + default: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{w}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0)) + (const_string "imov") + (and (eq_attr "alternative" "0") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_HIMODE_MATH") + (const_int 0)))) + (const_string "imov") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand" "")) + (const_string "imov") + (and (ne (symbol_ref "TARGET_MOVX") + (const_int 0)) + (eq_attr "alternative" "0,2")) + (const_string "imovx") + ] + (const_string "imov"))) + (set (attr "mode") + (cond [(eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "alternative" "1,2") + (match_operand:HI 1 "aligned_operand" "")) + (const_string "SI") + (and (eq_attr "alternative" "0") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_HIMODE_MATH") + (const_int 0)))) + (const_string "SI") + ] + (const_string "HI")))]) + +;; Stores and loads of ax to arbitrary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabshi_1_rex64" + [(set (mem:HI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:HI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" + "@ + movabs{w}\t{%1, %P0|%P0, %1} + mov{w}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") + (set_attr "memory" "store") + (set_attr "mode" "HI")]) + +(define_insn "*movabshi_2_rex64" + [(set (match_operand:HI 0 "register_operand" "=a,r") + (mem:HI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT && ix86_check_movabs (insn, 1)" + "@ + movabs{w}\t{%P1, %0|%0, %P1} + mov{w}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "HI")]) + +(define_insn "*swaphi_1" + [(set (match_operand:HI 0 "register_operand" "+r") + (match_operand:HI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "xchg{l}\t%k1, %k0" + [(set_attr "type" "imov") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double")]) + +;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10 +(define_insn "*swaphi_2" + [(set (match_operand:HI 0 "register_operand" "+r") + (match_operand:HI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_PARTIAL_REG_STALL" + "xchg{w}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "mode" "HI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector")]) + +(define_expand "movstricthi" + [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "")) + (match_operand:HI 1 "general_operand" ""))] + "" +{ + if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun)) + FAIL; + /* Don't generate memory->memory moves, go through a register */ + if (MEM_P (operands[0]) && MEM_P (operands[1])) + operands[1] = force_reg (HImode, operands[1]); +}) + +(define_insn "*movstricthi_1" + [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+rm,r")) + (match_operand:HI 1 "general_operand" "rn,m"))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "mov{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "imov") + (set_attr "mode" "HI")]) + +(define_insn "*movstricthi_xor" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r")) + (match_operand:HI 1 "const0_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "xor{w}\t%0, %0" + [(set_attr "type" "alu1") + (set_attr "mode" "HI") + (set_attr "length_immediate" "0")]) + +(define_expand "movqi" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" + "ix86_expand_move (QImode, operands); DONE;") + +;; emit_push_insn when it calls move_by_pieces requires an insn to +;; "push a byte". But actually we use pushl, which has the effect +;; of rounding the amount pushed up to a word. + +(define_insn "*pushqi2" + [(set (match_operand:QI 0 "push_operand" "=X") + (match_operand:QI 1 "nonmemory_no_elim_operand" "rn"))] + "!TARGET_64BIT" + "push{l}\t%k1" + [(set_attr "type" "push") + (set_attr "mode" "SI")]) + +;; For 64BIT abi we always round up to 8 bytes. +(define_insn "*pushqi2_rex64" + [(set (match_operand:QI 0 "push_operand" "=X") + (match_operand:QI 1 "nonmemory_no_elim_operand" "qn"))] + "TARGET_64BIT" + "push{q}\t%q1" + [(set_attr "type" "push") + (set_attr "mode" "DI")]) + +;; Situation is quite tricky about when to choose full sized (SImode) move +;; over QImode moves. For Q_REG -> Q_REG move we use full size only for +;; partial register dependency machines (such as AMD Athlon), where QImode +;; moves issue extra dependency and for partial register stalls machines +;; that don't use QImode patterns (and QImode move cause stall on the next +;; instruction). +;; +;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial +;; register stall machines with, where we use QImode instructions, since +;; partial register stall can be caused there. Then we use movzx. +(define_insn "*movqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m") + (match_operand:QI 1 "general_operand" " q,qn,qm,q,rn,qm,qn"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + gcc_assert (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1])); + return "movz{bl|x}\t{%1, %k0|%k0, %1}"; + default: + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else + return "mov{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(and (eq_attr "alternative" "5") + (not (match_operand:QI 1 "aligned_operand" ""))) + (const_string "imovx") + (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0)) + (const_string "imov") + (and (eq_attr "alternative" "3") + (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_QIMODE_MATH") + (const_int 0)))) + (const_string "imov") + (eq_attr "alternative" "3,5") + (const_string "imovx") + (and (ne (symbol_ref "TARGET_MOVX") + (const_int 0)) + (eq_attr "alternative" "2")) + (const_string "imovx") + ] + (const_string "imov"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,5") + (const_string "SI") + (eq_attr "alternative" "6") + (const_string "QI") + (eq_attr "type" "imovx") + (const_string "SI") + (and (eq_attr "type" "imov") + (and (eq_attr "alternative" "0,1") + (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (and (eq (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (eq (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)))))) + (const_string "SI") + ;; Avoid partial register stalls when not using QImode arithmetic + (and (eq_attr "type" "imov") + (and (eq_attr "alternative" "0,1") + (and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL") + (const_int 0)) + (eq (symbol_ref "TARGET_QIMODE_MATH") + (const_int 0))))) + (const_string "SI") + ] + (const_string "QI")))]) + +(define_insn "*swapqi_1" + [(set (match_operand:QI 0 "register_operand" "+r") + (match_operand:QI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)" + "xchg{l}\t%k1, %k0" + [(set_attr "type" "imov") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector")]) + +;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10 +(define_insn "*swapqi_2" + [(set (match_operand:QI 0 "register_operand" "+q") + (match_operand:QI 1 "register_operand" "+q")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_PARTIAL_REG_STALL" + "xchg{b}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "mode" "QI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector")]) + +(define_expand "movstrictqi" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) + (match_operand:QI 1 "general_operand" ""))] + "" +{ + if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun)) + FAIL; + /* Don't generate memory->memory moves, go through a register. */ + if (MEM_P (operands[0]) && MEM_P (operands[1])) + operands[1] = force_reg (QImode, operands[1]); +}) + +(define_insn "*movstrictqi_1" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (match_operand:QI 1 "general_operand" "*qn,m"))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "mov{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*movstrictqi_xor" + [(set (strict_low_part (match_operand:QI 0 "q_regs_operand" "+q")) + (match_operand:QI 1 "const0_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + "xor{b}\t%0, %0" + [(set_attr "type" "alu1") + (set_attr "mode" "QI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movsi_extv_1" + [(set (match_operand:SI 0 "register_operand" "=R") + (sign_extract:SI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "movs{bl|x}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movhi_extv_1" + [(set (match_operand:HI 0 "register_operand" "=R") + (sign_extract:HI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "movs{bl|x}\t{%h1, %k0|%k0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movqi_extv_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?r") + (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movs{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*movqi_extv_1_rex64" + [(set (match_operand:QI 0 "register_operand" "=Q,?R") + (sign_extract:QI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movs{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +;; Stores and loads of ax to arbitrary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabsqi_1_rex64" + [(set (mem:QI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:QI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" + "@ + movabs{b}\t{%1, %P0|%P0, %1} + mov{b}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_insn "*movabsqi_2_rex64" + [(set (match_operand:QI 0 "register_operand" "=a,r") + (mem:QI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT && ix86_check_movabs (insn, 1)" + "@ + movabs{b}\t{%P1, %0|%0, %P1} + mov{b}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "QI")]) + +(define_insn "*movdi_extzv_1" + [(set (match_operand:DI 0 "register_operand" "=R") + (zero_extract:DI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "TARGET_64BIT" + "movz{bl|x}\t{%h1, %k0|%k0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + +(define_insn "*movsi_extzv_1" + [(set (match_operand:SI 0 "register_operand" "=R") + (zero_extract:SI (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))] + "" + "movz{bl|x}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*movqi_extzv_2" + [(set (match_operand:QI 0 "nonimmediate_operand" "=Qm,?R") + (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) 0))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (and (match_operand:QI 0 "register_operand" "") + (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0)))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "*movqi_extzv_2_rex64" + [(set (match_operand:QI 0 "register_operand" "=Q,?R") + (subreg:QI (zero_extract:SI (match_operand 1 "ext_register_operand" "Q,Q") + (const_int 8) + (const_int 8)) 0))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + return "movz{bl|x}\t{%h1, %k0|%k0, %h1}"; + default: + return "mov{b}\t{%h1, %0|%0, %h1}"; + } +} + [(set (attr "type") + (if_then_else (ior (not (match_operand:QI 0 "q_regs_operand" "")) + (ne (symbol_ref "TARGET_MOVX") + (const_int 0))) + (const_string "imovx") + (const_string "imov"))) + (set (attr "mode") + (if_then_else (eq_attr "type" "imovx") + (const_string "SI") + (const_string "QI")))]) + +(define_insn "movsi_insv_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (match_operand:SI 1 "general_operand" "Qmn"))] + "!TARGET_64BIT" + "mov{b}\t{%b1, %h0|%h0, %b1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*movsi_insv_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (match_operand:SI 1 "nonmemory_operand" "Qn"))] + "TARGET_64BIT" + "mov{b}\t{%b1, %h0|%h0, %b1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "movdi_insv_1_rex64" + [(set (zero_extract:DI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (match_operand:DI 1 "nonmemory_operand" "Qn"))] + "TARGET_64BIT" + "mov{b}\t{%b1, %h0|%h0, %b1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_insn "*movqi_insv_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q") + (const_int 8) + (const_int 8)) + (lshiftrt:SI (match_operand:SI 1 "register_operand" "Q") + (const_int 8)))] + "" + "mov{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + +(define_expand "movdi" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "" + "ix86_expand_move (DImode, operands); DONE;") + +(define_insn "*pushdi" + [(set (match_operand:DI 0 "push_operand" "=<") + (match_operand:DI 1 "general_no_elim_operand" "riF*m"))] + "!TARGET_64BIT" + "#") + +(define_insn "*pushdi2_rex64" + [(set (match_operand:DI 0 "push_operand" "=<,!<") + (match_operand:DI 1 "general_no_elim_operand" "re*m,n"))] + "TARGET_64BIT" + "@ + push{q}\t%1 + #" + [(set_attr "type" "push,multi") + (set_attr "mode" "DI")]) + +;; Convert impossible pushes of immediate to existing instructions. +;; First try to get scratch register and go through it. In case this +;; fails, push sign extended lower part first and then overwrite +;; upper part by 32bit move. +(define_peephole2 + [(match_scratch:DI 2 "r") + (set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; We need to define this as both peepholer and splitter for case +;; peephole2 pass is not run. +;; "&& 1" is needed to keep it from matching the previous pattern. +(define_peephole2 + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode) && 1" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] + "split_di (&operands[1], 1, &operands[2], &operands[3]); + operands[1] = gen_lowpart (DImode, operands[2]); + operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, + GEN_INT (4))); + ") + +(define_split + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed) + && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 2) (match_dup 3))] + "split_di (&operands[1], 1, &operands[2], &operands[3]); + operands[1] = gen_lowpart (DImode, operands[2]); + operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx, + GEN_INT (4))); + ") + +(define_insn "*pushdi2_prologue_rex64" + [(set (match_operand:DI 0 "push_operand" "=<") + (match_operand:DI 1 "general_no_elim_operand" "re*m")) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" + "push{q}\t%1" + [(set_attr "type" "push") + (set_attr "mode" "DI")]) + +(define_insn "*popdi1_epilogue_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m") + (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) + (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" + "pop{q}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "DI")]) + +(define_insn "popdi1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r*m") + (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) + (plus:DI (reg:DI SP_REG) (const_int 8)))] + "TARGET_64BIT" + "pop{q}\t%0" + [(set_attr "type" "pop") + (set_attr "mode" "DI")]) + +(define_insn "*movdi_xor_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "const0_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && reload_completed" + "xor{l}\t%k0, %k0"; + [(set_attr "type" "alu1") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movdi_or_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "const_int_operand" "i")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && reload_completed + && operands[1] == constm1_rtx" +{ + operands[1] = constm1_rtx; + return "or{q}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "alu1") + (set_attr "mode" "DI") + (set_attr "length_immediate" "1")]) + +(define_insn "*movdi_2" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r ,o ,*y,m*y,*y,*Y2,m ,*Y2,*Y2,*x,m ,*x,*x") + (match_operand:DI 1 "general_operand" + "riFo,riF,C ,*y ,m ,C ,*Y2,*Y2,m ,C ,*x,*x,m "))] + "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + # + # + pxor\t%0, %0 + movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + %vpxor\t%0, %d0 + %vmovq\t{%1, %0|%0, %1} + %vmovdqa\t{%1, %0|%0, %1} + %vmovq\t{%1, %0|%0, %1} + xorps\t%0, %0 + movlps\t{%1, %0|%0, %1} + movaps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "5,6,7,8") + (const_string "vex") + (const_string "orig"))) + (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")]) + +(define_split + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "!TARGET_64BIT && reload_completed + && (! MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +;; %%% This multiword shite has got to go. +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:DI 1 "general_operand" ""))] + "!TARGET_64BIT && reload_completed + && (!MMX_REG_P (operands[0]) && !SSE_REG_P (operands[0])) + && (!MMX_REG_P (operands[1]) && !SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*movdi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" + "=r,r ,r,m ,!m,*y,*y,?r ,m ,?*Ym,?*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym") + (match_operand:DI 1 "general_operand" + "Z ,rem,i,re,n ,C ,*y,*Ym,*y,r ,m ,C ,*x,*Yi,*x,r ,m ,*Ym,*x"))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_SSECVT: + if (SSE_REG_P (operands[0])) + return "movq2dq\t{%1, %0|%0, %1}"; + else + return "movdq2q\t{%1, %0|%0, %1}"; + + case TYPE_SSEMOV: + if (TARGET_AVX) + { + if (get_attr_mode (insn) == MODE_TI) + return "vmovdqa\t{%1, %0|%0, %1}"; + else + return "vmovq\t{%1, %0|%0, %1}"; + } + + if (get_attr_mode (insn) == MODE_TI) + return "movdqa\t{%1, %0|%0, %1}"; + /* FALLTHRU */ + + case TYPE_MMXMOV: + /* Moves from and into integer register is done using movd + opcode with REX prefix. */ + if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])) + return "movd\t{%1, %0|%0, %1}"; + return "movq\t{%1, %0|%0, %1}"; + + case TYPE_SSELOG1: + return "%vpxor\t%0, %d0"; + + case TYPE_MMX: + return "pxor\t%0, %0"; + + case TYPE_MULTI: + return "#"; + + case TYPE_LEA: + return "lea{q}\t{%a1, %0|%0, %a1}"; + + default: + gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1])); + if (get_attr_mode (insn) == MODE_SI) + return "mov{l}\t{%k1, %k0|%k0, %k1}"; + else if (which_alternative == 2) + return "movabs{q}\t{%1, %0|%0, %1}"; + else + return "mov{q}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "5") + (const_string "mmx") + (eq_attr "alternative" "6,7,8,9,10") + (const_string "mmxmov") + (eq_attr "alternative" "11") + (const_string "sselog1") + (eq_attr "alternative" "12,13,14,15,16") + (const_string "ssemov") + (eq_attr "alternative" "17,18") + (const_string "ssecvt") + (eq_attr "alternative" "4") + (const_string "multi") + (match_operand:DI 1 "pic_32bit_operand" "") + (const_string "lea") + ] + (const_string "imov"))) + (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*") + (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "11,12,13,14,15,16") + (const_string "maybe_vex") + (const_string "orig"))) + (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")]) + +;; Stores and loads of ax to arbitrary constant address. +;; We fake an second form of instruction to force reload to load address +;; into register when rax is not available +(define_insn "*movabsdi_1_rex64" + [(set (mem:DI (match_operand:DI 0 "x86_64_movabs_operand" "i,r")) + (match_operand:DI 1 "nonmemory_operand" "a,er"))] + "TARGET_64BIT && ix86_check_movabs (insn, 0)" + "@ + movabs{q}\t{%1, %P0|%P0, %1} + mov{q}\t{%1, %a0|%a0, %1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0,*") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +(define_insn "*movabsdi_2_rex64" + [(set (match_operand:DI 0 "register_operand" "=a,r") + (mem:DI (match_operand:DI 1 "x86_64_movabs_operand" "i,r")))] + "TARGET_64BIT && ix86_check_movabs (insn, 1)" + "@ + movabs{q}\t{%P1, %0|%0, %P1} + mov{q}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "imov") + (set_attr "modrm" "0,*") + (set_attr "length_address" "8,0") + (set_attr "length_immediate" "0") + (set_attr "memory" "load") + (set_attr "mode" "DI")]) + +;; Convert impossible stores of immediate to existing instructions. +;; First try to get scratch register and go through it. In case this +;; fails, move by 32bit parts. +(define_peephole2 + [(match_scratch:DI 2 "r") + (set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; We need to define this as both peepholer and splitter for case +;; peephole2 pass is not run. +;; "&& 1" is needed to keep it from matching the previous pattern. +(define_peephole2 + [(set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode) && 1" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + "split_di (&operands[0], 2, &operands[2], &operands[4]);") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed) + && !symbolic_operand (operands[1], DImode) + && !x86_64_immediate_operand (operands[1], DImode)" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] + "split_di (&operands[0], 2, &operands[2], &operands[4]);") + +(define_insn "*swapdi_rex64" + [(set (match_operand:DI 0 "register_operand" "+r") + (match_operand:DI 1 "register_operand" "+r")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_64BIT" + "xchg{q}\t%1, %0" + [(set_attr "type" "imov") + (set_attr "mode" "DI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "double")]) + +(define_expand "movoi" + [(set (match_operand:OI 0 "nonimmediate_operand" "") + (match_operand:OI 1 "general_operand" ""))] + "TARGET_AVX" + "ix86_expand_move (OImode, operands); DONE;") + +(define_insn "*movoi_internal" + [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:OI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_AVX + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + return "vxorps\t%0, %0, %0"; + case 1: + case 2: + if (misaligned_operand (operands[0], OImode) + || misaligned_operand (operands[1], OImode)) + return "vmovdqu\t{%1, %0|%0, %1}"; + else + return "vmovdqa\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "vex") + (set_attr "mode" "OI")]) + +(define_expand "movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "nonimmediate_operand" ""))] + "TARGET_SSE || TARGET_64BIT" +{ + if (TARGET_64BIT) + ix86_expand_move (TImode, operands); + else if (push_operand (operands[0], TImode)) + ix86_expand_push (TImode, operands[1]); + else + ix86_expand_vector_move (TImode, operands); + DONE; +}) + +(define_insn "*movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:TI 1 "vector_move_operand" "C,xm,x"))] + "TARGET_SSE && !TARGET_64BIT + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + case 1: + case 2: + /* TDmode values are passed as TImode on the stack. Moving them + to stack may result in unaligned memory access. */ + if (misaligned_operand (operands[0], TImode) + || misaligned_operand (operands[1], TImode)) + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovups\t{%1, %0|%0, %1}"; + else + return "%vmovdqu\t{%1, %0|%0, %1}"; + } + else + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + } + default: + gcc_unreachable (); + } +} + [(set_attr "type" "sselog1,ssemov,ssemov") + (set_attr "prefix" "maybe_vex") + (set (attr "mode") + (cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))) + (const_string "V4SF") + (and (eq_attr "alternative" "2") + (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0))) + (const_string "V4SF")] + (const_string "TI")))]) + +(define_insn "*movti_rex64" + [(set (match_operand:TI 0 "nonimmediate_operand" "=!r,o,x,x,xm") + (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))] + "TARGET_64BIT + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + case 1: + return "#"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + case 3: + case 4: + /* TDmode values are passed as TImode on the stack. Moving them + to stack may result in unaligned memory access. */ + if (misaligned_operand (operands[0], TImode) + || misaligned_operand (operands[1], TImode)) + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovups\t{%1, %0|%0, %1}"; + else + return "%vmovdqu\t{%1, %0|%0, %1}"; + } + else + { + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + } + default: + gcc_unreachable (); + } +} + [(set_attr "type" "*,*,sselog1,ssemov,ssemov") + (set_attr "prefix" "*,*,maybe_vex,maybe_vex,maybe_vex") + (set (attr "mode") + (cond [(eq_attr "alternative" "2,3") + (if_then_else + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "4") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "reload_completed && !SSE_REG_P (operands[0]) + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +;; This expands to what emit_move_complex would generate if we didn't +;; have a movti pattern. Having this avoids problems with reload on +;; 32-bit targets when SSE is present, but doesn't seem to be harmful +;; to have around all the time. +(define_expand "movcdi" + [(set (match_operand:CDI 0 "nonimmediate_operand" "") + (match_operand:CDI 1 "general_operand" ""))] + "" +{ + if (push_operand (operands[0], CDImode)) + emit_move_complex_push (CDImode, operands[0], operands[1]); + else + emit_move_complex_parts (operands[0], operands[1]); + DONE; +}) + +(define_expand "movsf" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (match_operand:SF 1 "general_operand" ""))] + "" + "ix86_expand_move (SFmode, operands); DONE;") + +(define_insn "*pushsf" + [(set (match_operand:SF 0 "push_operand" "=<,<,<") + (match_operand:SF 1 "general_no_elim_operand" "f,rFm,x"))] + "!TARGET_64BIT" +{ + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{l}\t%1"; +} + [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "SF,SI,SF")]) + +(define_insn "*pushsf_rex64" + [(set (match_operand:SF 0 "push_operand" "=X,X,X") + (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))] + "TARGET_64BIT" +{ + /* Anything else should be already split before reg-stack. */ + gcc_assert (which_alternative == 1); + return "push{q}\t%q1"; +} + [(set_attr "type" "multi,push,multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "SF,DI,SF")]) + +(define_split + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "memory_operand" ""))] + "reload_completed + && MEM_P (operands[1]) + && (operands[2] = find_constant_src (insn))" + [(set (match_dup 0) + (match_dup 2))]) + + +;; %%% Kill this when call knows how to work this out. +(define_split + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "any_fp_register_operand" ""))] + "!TARGET_64BIT" + [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) + (set (mem:SF (reg:SI SP_REG)) (match_dup 1))]) + +(define_split + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "any_fp_register_operand" ""))] + "TARGET_64BIT" + [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) + (set (mem:SF (reg:DI SP_REG)) (match_dup 1))]) + +(define_insn "*movsf_1" + [(set (match_operand:SF 0 "nonimmediate_operand" + "=f,m,f,r ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r") + (match_operand:SF 1 "general_operand" + "fm,f,G,rmF,Fr,C,x,xm,x,m ,*y,*y ,r ,Yi,r ,*Ym"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun) + && standard_80387_constant_p (operands[1])) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], SFmode))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "mov{l}\t{%1, %0|%0, %1}"; + case 5: + if (get_attr_mode (insn) == MODE_TI) + return "%vpxor\t%0, %d0"; + else + return "%vxorps\t%0, %d0"; + case 6: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovss\t{%1, %d0|%d0, %1}"; + case 7: + if (TARGET_AVX) + return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}" + : "vmovss\t{%1, %0|%0, %1}"; + else + return "movss\t{%1, %0|%0, %1}"; + case 8: + return "%vmovss\t{%1, %0|%0, %1}"; + + case 9: case 10: case 14: case 15: + return "movd\t{%1, %0|%0, %1}"; + case 12: case 13: + return "%vmovd\t{%1, %0|%0, %1}"; + + case 11: + return "movq\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "5,6,7,8,12,13") + (const_string "maybe_vex") + (const_string "orig"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "3,4,9,10") + (const_string "SI") + (eq_attr "alternative" "5") + (if_then_else + (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE2") + (const_int 0))) + (eq (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "TI") + (const_string "V4SF")) + /* For architectures resolving dependencies on + whole SSE registers use APS move to break dependency + chains, otherwise use short move to avoid extra work. + + Do the same for architectures resolving dependencies on + the parts. While in DF mode it is better to always handle + just register parts, the SF mode is different due to lack + of instructions to load just part of the register. It is + better to maintain the whole registers in single format + to avoid problems on using packed logical operations. */ + (eq_attr "alternative" "6") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0))) + (const_string "V4SF") + (const_string "SF")) + (eq_attr "alternative" "11") + (const_string "DI")] + (const_string "SF")))]) + +(define_insn "*swapsf" + [(set (match_operand:SF 0 "fp_register_operand" "+f") + (match_operand:SF 1 "fp_register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "reload_completed || TARGET_80387" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "SF")]) + +(define_expand "movdf" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "" + "ix86_expand_move (DFmode, operands); DONE;") + +;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. +;; Size of pushdf using integer instructions is 2+2*memory operand size +;; On the average, pushdf using integers can be still shorter. Allow this +;; pattern for optimize_size too. + +(define_insn "*pushdf_nointeger" + [(set (match_operand:DF 0 "push_operand" "=<,<,<,<") + (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))] + "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*,*") + (set_attr "mode" "DF,SI,SI,DF")]) + +(define_insn "*pushdf_integer" + [(set (match_operand:DF 0 "push_operand" "=<,<,<") + (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))] + "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "DF,SI,DF")]) + +;; %%% Kill this when call knows how to work this out. +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (match_operand:DF 1 "any_fp_register_operand" ""))] + "reload_completed" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) + (set (mem:DF (reg:P SP_REG)) (match_dup 1))] + "") + +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "reload_completed" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +;; Moving is usually shorter when only FP registers are used. This separate +;; movdf pattern avoids the use of integer registers for FP operations +;; when optimizing for size. + +(define_insn "*movdf_nointeger" + [(set (match_operand:DF 0 "nonimmediate_operand" + "=f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ") + (match_operand:DF 1 "general_operand" + "fm,f,G,*roF,*Fr,C ,Y2*x,mY2*x,Y2*x"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && ((optimize_function_for_size_p (cfun) + || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!(TARGET_SSE2 && TARGET_SSE_MATH) + && optimize_function_for_size_p (cfun) + && !memory_operand (operands[0], DFmode) + && standard_80387_constant_p (operands[1])) + || GET_CODE (operands[1]) != CONST_DOUBLE + || ((optimize_function_for_size_p (cfun) + || !TARGET_MEMORY_MISMATCH_STALL + || reload_in_progress || reload_completed) + && memory_operand (operands[0], DFmode)))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "#"; + case 5: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vxorps\t%0, %d0"; + case MODE_V2DF: + return "%vxorpd\t%0, %d0"; + case MODE_TI: + return "%vpxor\t%0, %d0"; + default: + gcc_unreachable (); + } + case 6: + case 7: + case 8: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "%vmovapd\t{%1, %0|%0, %1}"; + case MODE_TI: + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_DI: + return "%vmovq\t{%1, %0|%0, %1}"; + case MODE_DF: + if (TARGET_AVX) + { + if (REG_P (operands[0]) && REG_P (operands[1])) + return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovsd\t{%1, %0|%0, %1}"; + } + else + return "movsd\t{%1, %0|%0, %1}"; + case MODE_V1DF: + if (TARGET_AVX) + { + if (REG_P (operands[0])) + return "vmovlpd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovlpd\t{%1, %0|%0, %1}"; + } + else + return "movlpd\t{%1, %0|%0, %1}"; + case MODE_V2SF: + if (TARGET_AVX) + { + if (REG_P (operands[0])) + return "vmovlps\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovlps\t{%1, %0|%0, %1}"; + } + else + return "movlps\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "0,1,2,3,4") + (const_string "orig") + (const_string "maybe_vex"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4") + (const_string "SI") + + /* For SSE1, we have many fewer alternatives. */ + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (cond [(eq_attr "alternative" "5,6") + (const_string "V4SF") + ] + (const_string "V2SF")) + + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF") + ] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0)) + (const_string "V1DF") + (const_string "DF")) + ] + (const_string "DF")))]) + +(define_insn "*movdf_integer_rex64" + [(set (match_operand:DF 0 "nonimmediate_operand" + "=f,m,f,r ,m ,Y2*x,Y2*x,Y2*x,m ,Yi,r ") + (match_operand:DF 1 "general_operand" + "fm,f,G,rmF,Fr,C ,Y2*x,m ,Y2*x,r ,Yi"))] + "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!(TARGET_SSE2 && TARGET_SSE_MATH) + && optimize_function_for_size_p (cfun) + && standard_80387_constant_p (operands[1])) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], DFmode))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "#"; + + case 5: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vxorps\t%0, %d0"; + case MODE_V2DF: + return "%vxorpd\t%0, %d0"; + case MODE_TI: + return "%vpxor\t%0, %d0"; + default: + gcc_unreachable (); + } + case 6: + case 7: + case 8: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "%vmovaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "%vmovapd\t{%1, %0|%0, %1}"; + case MODE_TI: + return "%vmovdqa\t{%1, %0|%0, %1}"; + case MODE_DI: + return "%vmovq\t{%1, %0|%0, %1}"; + case MODE_DF: + if (TARGET_AVX) + { + if (REG_P (operands[0]) && REG_P (operands[1])) + return "vmovsd\t{%1, %0, %0|%0, %0, %1}"; + else + return "vmovsd\t{%1, %0|%0, %1}"; + } + else + return "movsd\t{%1, %0|%0, %1}"; + case MODE_V1DF: + return "%vmovlpd\t{%1, %d0|%d0, %1}"; + case MODE_V2SF: + return "%vmovlps\t{%1, %d0|%d0, %1}"; + default: + gcc_unreachable (); + } + + case 9: + case 10: + return "%vmovd\t{%1, %0|%0, %1}"; + + default: + gcc_unreachable(); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov") + (set (attr "prefix") + (if_then_else (eq_attr "alternative" "0,1,2,3,4") + (const_string "orig") + (const_string "maybe_vex"))) + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4,9,10") + (const_string "DI") + + /* For SSE1, we have many fewer alternatives. */ + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (cond [(eq_attr "alternative" "5,6") + (const_string "V4SF") + ] + (const_string "V2SF")) + + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF") + ] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0)) + (const_string "V1DF") + (const_string "DF")) + ] + (const_string "DF")))]) + +(define_insn "*movdf_integer" + [(set (match_operand:DF 0 "nonimmediate_operand" + "=f,m,f,r ,o ,Y2*x,Y2*x,Y2*x,m ") + (match_operand:DF 1 "general_operand" + "fm,f,G,roF,Fr,C ,Y2*x,m ,Y2*x"))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && optimize_function_for_speed_p (cfun) + && TARGET_INTEGER_DFMODE_MOVES + && (reload_in_progress || reload_completed + || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) + || (!(TARGET_SSE2 && TARGET_SSE_MATH) + && optimize_function_for_size_p (cfun) + && standard_80387_constant_p (operands[1])) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], DFmode))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: + case 4: + return "#"; + + case 5: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "xorps\t%0, %0"; + case MODE_V2DF: + return "xorpd\t%0, %0"; + case MODE_TI: + return "pxor\t%0, %0"; + default: + gcc_unreachable (); + } + case 6: + case 7: + case 8: + switch (get_attr_mode (insn)) + { + case MODE_V4SF: + return "movaps\t{%1, %0|%0, %1}"; + case MODE_V2DF: + return "movapd\t{%1, %0|%0, %1}"; + case MODE_TI: + return "movdqa\t{%1, %0|%0, %1}"; + case MODE_DI: + return "movq\t{%1, %0|%0, %1}"; + case MODE_DF: + return "movsd\t{%1, %0|%0, %1}"; + case MODE_V1DF: + return "movlpd\t{%1, %0|%0, %1}"; + case MODE_V2SF: + return "movlps\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } + + default: + gcc_unreachable(); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov") + (set (attr "mode") + (cond [(eq_attr "alternative" "0,1,2") + (const_string "DF") + (eq_attr "alternative" "3,4") + (const_string "SI") + + /* For SSE1, we have many fewer alternatives. */ + (eq (symbol_ref "TARGET_SSE2") (const_int 0)) + (cond [(eq_attr "alternative" "5,6") + (const_string "V4SF") + ] + (const_string "V2SF")) + + /* xorps is one byte shorter. */ + (eq_attr "alternative" "5") + (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") + (const_int 0)) + (const_string "TI") + ] + (const_string "V2DF")) + + /* For architectures resolving dependencies on + whole SSE registers use APD move to break dependency + chains, otherwise use short move to avoid extra work. + + movaps encodes one byte shorter. */ + (eq_attr "alternative" "6") + (cond + [(ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") + (const_int 0)) + (const_string "V2DF") + ] + (const_string "DF")) + /* For architectures resolving dependencies on register + parts we may avoid extra work to zero out upper part + of register. */ + (eq_attr "alternative" "7") + (if_then_else + (ne (symbol_ref "TARGET_SSE_SPLIT_REGS") + (const_int 0)) + (const_string "V1DF") + (const_string "DF")) + ] + (const_string "DF")))]) + +(define_split + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (match_operand:DF 1 "general_operand" ""))] + "reload_completed + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && ! (ANY_FP_REG_P (operands[0]) || + (GET_CODE (operands[0]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[0])))) + && ! (ANY_FP_REG_P (operands[1]) || + (GET_CODE (operands[1]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_insn "*swapdf" + [(set (match_operand:DF 0 "fp_register_operand" "+f") + (match_operand:DF 1 "fp_register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "reload_completed || TARGET_80387" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "DF")]) + +(define_expand "movxf" + [(set (match_operand:XF 0 "nonimmediate_operand" "") + (match_operand:XF 1 "general_operand" ""))] + "" + "ix86_expand_move (XFmode, operands); DONE;") + +;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size. +;; Size of pushdf using integer instructions is 3+3*memory operand size +;; Pushing using integer instructions is longer except for constants +;; and direct memory references. +;; (assuming that any given constant is pushed only once, but this ought to be +;; handled elsewhere). + +(define_insn "*pushxf_nointeger" + [(set (match_operand:XF 0 "push_operand" "=X,X,X") + (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))] + "optimize_function_for_size_p (cfun)" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*,*") + (set_attr "mode" "XF,SI,SI")]) + +(define_insn "*pushxf_integer" + [(set (match_operand:XF 0 "push_operand" "=<,<") + (match_operand:XF 1 "general_no_elim_operand" "f,ro"))] + "optimize_function_for_speed_p (cfun)" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "i387,*") + (set_attr "mode" "XF,SI")]) + +(define_split + [(set (match_operand 0 "push_operand" "") + (match_operand 1 "general_operand" ""))] + "reload_completed + && (GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == DFmode) + && !ANY_FP_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (match_operand:XF 1 "any_fp_register_operand" ""))] + "" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:XF (reg:P SP_REG)) (match_dup 1))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") + +;; Do not use integer registers when optimizing for size +(define_insn "*movxf_nointeger" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o") + (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))] + "optimize_function_for_size_p (cfun) + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && (reload_in_progress || reload_completed + || standard_80387_constant_p (operands[1]) + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], XFmode))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: case 4: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) + +(define_insn "*movxf_integer" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o") + (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))] + "optimize_function_for_speed_p (cfun) + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && (reload_in_progress || reload_completed + || GET_CODE (operands[1]) != CONST_DOUBLE + || memory_operand (operands[0], XFmode))" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return standard_80387_constant_opcode (operands[1]); + + case 3: case 4: + return "#"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,fmov,multi,multi") + (set_attr "mode" "XF,XF,XF,SI,SI")]) + +(define_expand "movtf" + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "nonimmediate_operand" ""))] + "TARGET_SSE2" +{ + ix86_expand_move (TFmode, operands); + DONE; +}) + +(define_insn "*movtf_internal" + [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o") + (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))] + "TARGET_SSE2 + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (which_alternative) + { + case 0: + case 1: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vmovaps\t{%1, %0|%0, %1}"; + else + return "%vmovdqa\t{%1, %0|%0, %1}"; + case 2: + if (get_attr_mode (insn) == MODE_V4SF) + return "%vxorps\t%0, %d0"; + else + return "%vpxor\t%0, %d0"; + case 3: + case 4: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "ssemov,ssemov,sselog1,*,*") + (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*") + (set (attr "mode") + (cond [(eq_attr "alternative" "0,2") + (if_then_else + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0)) + (const_string "V4SF") + (const_string "TI")) + (eq_attr "alternative" "1") + (if_then_else + (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") + (const_int 0)) + (ne (symbol_ref "optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "V4SF") + (const_string "TI"))] + (const_string "DI")))]) + +(define_insn "*pushtf_sse" + [(set (match_operand:TF 0 "push_operand" "=<,<,<") + (match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))] + "TARGET_SSE2" +{ + /* This insn should be already split before reg-stack. */ + gcc_unreachable (); +} + [(set_attr "type" "multi") + (set_attr "unit" "sse,*,*") + (set_attr "mode" "TF,SI,SI")]) + +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "TARGET_SSE2 && reload_completed + && !SSE_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_split + [(set (match_operand:TF 0 "push_operand" "") + (match_operand:TF 1 "any_fp_register_operand" ""))] + "TARGET_SSE2" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16))) + (set (mem:TF (reg:P SP_REG)) (match_dup 1))] + "") + +(define_split + [(set (match_operand 0 "nonimmediate_operand" "") + (match_operand 1 "general_operand" ""))] + "reload_completed + && !(MEM_P (operands[0]) && MEM_P (operands[1])) + && GET_MODE (operands[0]) == XFmode + && ! (ANY_FP_REG_P (operands[0]) || + (GET_CODE (operands[0]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[0])))) + && ! (ANY_FP_REG_P (operands[1]) || + (GET_CODE (operands[1]) == SUBREG + && ANY_FP_REG_P (SUBREG_REG (operands[1]))))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operand 1 "memory_operand" ""))] + "reload_completed + && MEM_P (operands[1]) + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == SFmode + || GET_MODE (operands[0]) == DFmode) + && (operands[2] = find_constant_src (insn))" + [(set (match_dup 0) (match_dup 2))] +{ + rtx c = operands[2]; + rtx r = operands[0]; + + if (GET_CODE (r) == SUBREG) + r = SUBREG_REG (r); + + if (SSE_REG_P (r)) + { + if (!standard_sse_constant_p (c)) + FAIL; + } + else if (FP_REG_P (r)) + { + if (!standard_80387_constant_p (c)) + FAIL; + } + else if (MMX_REG_P (r)) + FAIL; +}) + +(define_split + [(set (match_operand 0 "register_operand" "") + (float_extend (match_operand 1 "memory_operand" "")))] + "reload_completed + && MEM_P (operands[1]) + && (GET_MODE (operands[0]) == TFmode + || GET_MODE (operands[0]) == XFmode + || GET_MODE (operands[0]) == SFmode + || GET_MODE (operands[0]) == DFmode) + && (operands[2] = find_constant_src (insn))" + [(set (match_dup 0) (match_dup 2))] +{ + rtx c = operands[2]; + rtx r = operands[0]; + + if (GET_CODE (r) == SUBREG) + r = SUBREG_REG (r); + + if (SSE_REG_P (r)) + { + if (!standard_sse_constant_p (c)) + FAIL; + } + else if (FP_REG_P (r)) + { + if (!standard_80387_constant_p (c)) + FAIL; + } + else if (MMX_REG_P (r)) + FAIL; +}) + +(define_insn "swapxf" + [(set (match_operand:XF 0 "register_operand" "+f") + (match_operand:XF 1 "register_operand" "+f")) + (set (match_dup 1) + (match_dup 0))] + "TARGET_80387" +{ + if (STACK_TOP_P (operands[0])) + return "fxch\t%1"; + else + return "fxch\t%0"; +} + [(set_attr "type" "fxch") + (set_attr "mode" "XF")]) + +;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (match_operand:X87MODEF 1 "immediate_operand" ""))] + "reload_completed && FP_REGNO_P (REGNO (operands[0])) + && (standard_80387_constant_p (operands[1]) == 8 + || standard_80387_constant_p (operands[1]) == 9)" + [(set (match_dup 0)(match_dup 1)) + (set (match_dup 0) + (neg:X87MODEF (match_dup 0)))] +{ + REAL_VALUE_TYPE r; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + if (real_isnegzero (&r)) + operands[1] = CONST0_RTX (mode); + else + operands[1] = CONST1_RTX (mode); +}) + +(define_split + [(set (match_operand:TF 0 "nonimmediate_operand" "") + (match_operand:TF 1 "general_operand" ""))] + "reload_completed + && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + +;; Zero extension instructions + +(define_expand "zero_extendhisi2" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))] + "" +{ + if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) + { + operands[1] = force_reg (HImode, operands[1]); + emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "zero_extendhisi2_and" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:HI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && TARGET_ZERO_EXTEND_WITH_AND + && optimize_function_for_speed_p (cfun)" + [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_insn "*zero_extendhisi2_movzwl" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "!TARGET_ZERO_EXTEND_WITH_AND + || optimize_function_for_size_p (cfun)" + "movz{wl|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_expand "zero_extendqihi2" + [(parallel + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_insn "*zero_extendqihi2_and" + [(set (match_operand:HI 0 "register_operand" "=r,?&q") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "HI")]) + +(define_insn "*zero_extendqihi2_movzbw_and" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,0"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)" + "#" + [(set_attr "type" "imovx,alu1") + (set_attr "mode" "HI")]) + +; zero extend to SImode here to avoid partial register stalls +(define_insn "*zero_extendqihi2_movzbl" + [(set (match_operand:HI 0 "register_operand" "=r") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)) + && reload_completed" + "movz{bl|x}\t{%1, %k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +;; For the movzbw case strip only the clobber +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && (!TARGET_ZERO_EXTEND_WITH_AND + || optimize_function_for_size_p (cfun)) + && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))" + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))]) + +;; When source and destination does not overlap, clear destination +;; first and then do the movb +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (TARGET_ZERO_EXTEND_WITH_AND + && optimize_function_for_speed_p (cfun)) + && !reg_overlap_mentioned_p (operands[0], operands[1])" + [(set (match_dup 0) (const_int 0)) + (set (strict_low_part (match_dup 2)) (match_dup 1))] + "operands[2] = gen_lowpart (QImode, operands[0]);") + +;; Rest is handled by single and. +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (zero_extend:HI (match_operand:QI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(parallel [(set (match_dup 0) (and:HI (match_dup 0) (const_int 255))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_expand "zero_extendqisi2" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_insn "*zero_extendqisi2_and" + [(set (match_operand:SI 0 "register_operand" "=r,?&q") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +(define_insn "*zero_extendqisi2_movzbw_and" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm,0"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)" + "#" + [(set_attr "type" "imovx,alu1") + (set_attr "mode" "SI")]) + +(define_insn "*zero_extendqisi2_movzbw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)) + && reload_completed" + "movz{bl|x}\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +;; For the movzbl case strip only the clobber +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)) + && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))" + [(set (match_dup 0) + (zero_extend:SI (match_dup 1)))]) + +;; When source and destination does not overlap, clear destination +;; first and then do the movb +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1])) + && (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)) + && !reg_overlap_mentioned_p (operands[0], operands[1])" + [(set (match_dup 0) (const_int 0)) + (set (strict_low_part (match_dup 2)) (match_dup 1))] + "operands[2] = gen_lowpart (QImode, operands[0]);") + +;; Rest is handled by single and. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:QI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 255))) + (clobber (reg:CC FLAGS_REG))])] + "") + +;; %%% Kill me once multi-word ops are sane. +(define_expand "zero_extendsidi2" + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))] + "" +{ + if (!TARGET_64BIT) + { + emit_insn (gen_zero_extendsidi2_32 (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "zero_extendsidi2_32" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,?*y,?*Yi,*Y2") + (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r ,m ,r ,m"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "@ + # + # + # + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1}" + [(set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov") + (set_attr "prefix" "*,*,*,orig,orig,maybe_vex,maybe_vex") + (set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")]) + +(define_insn "zero_extendsidi2_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,?*y,?*Yi,*Y2") + (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "rm,0,r ,m ,r ,m")))] + "TARGET_64BIT" + "@ + mov\t{%k1, %k0|%k0, %k1} + # + movd\t{%1, %0|%0, %1} + movd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1} + %vmovd\t{%1, %0|%0, %1}" + [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov") + (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex") + (set_attr "mode" "SI,DI,DI,DI,TI,TI")]) + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (zero_extend:DI (match_dup 0)))] + "TARGET_64BIT" + [(set (match_dup 4) (const_int 0))] + "split_di (&operands[0], 1, &operands[3], &operands[4]);") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(set (match_dup 4) (const_int 0))] + "split_di (&operands[0], 1, &operands[3], &operands[4]);") + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (zero_extend:DI (match_operand:SI 1 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed + && !SSE_REG_P (operands[0]) && !MMX_REG_P (operands[0])" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 4) (const_int 0))] + "split_di (&operands[0], 1, &operands[3], &operands[4]);") + +(define_insn "zero_extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "TARGET_64BIT" + "movz{wl|x}\t{%1, %k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + +(define_insn "zero_extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "rm")))] + "TARGET_64BIT" + "movz{bl|x}\t{%1, %k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + +;; Sign extension instructions + +(define_expand "extendsidi2" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 2 ""))])] + "" +{ + if (TARGET_64BIT) + { + emit_insn (gen_extendsidi2_rex64 (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "*extendsidi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=*A,r,?r,?*o") + (sign_extend:DI (match_operand:SI 1 "register_operand" "0,0,r,r"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 2 "=X,X,X,&r"))] + "!TARGET_64BIT" + "#") + +(define_insn "extendsidi2_rex64" + [(set (match_operand:DI 0 "register_operand" "=*a,r") + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "*0,rm")))] + "TARGET_64BIT" + "@ + {cltq|cdqe} + movs{lq|x}\t{%1,%0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI") + (set_attr "prefix_0f" "0") + (set_attr "modrm" "0,1")]) + +(define_insn "extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "TARGET_64BIT" + "movs{wq|x}\t{%1,%0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + +(define_insn "extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "TARGET_64BIT" + "movs{bq|x}\t{%1,%0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "DI")]) + +;; Extend to memory case when source register does die. +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_operand:SI 2 "register_operand" ""))] + "(reload_completed + && dead_or_set_p (insn, operands[1]) + && !reg_mentioned_p (operands[1], operands[0]))" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 1) (ashiftrt:SI (match_dup 1) (const_int 31))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 4) (match_dup 1))] + "split_di (&operands[0], 1, &operands[3], &operands[4]);") + +;; Extend to memory case when source register does not die. +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_operand:SI 2 "register_operand" ""))] + "reload_completed" + [(const_int 0)] +{ + split_di (&operands[0], 1, &operands[3], &operands[4]); + + emit_move_insn (operands[3], operands[1]); + + /* Generate a cltd if possible and doing so it profitable. */ + if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + && true_regnum (operands[1]) == AX_REG + && true_regnum (operands[2]) == DX_REG) + { + emit_insn (gen_ashrsi3_31 (operands[2], operands[1], GEN_INT (31))); + } + else + { + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_ashrsi3_31 (operands[2], operands[2], GEN_INT (31))); + } + emit_move_insn (operands[4], operands[2]); + DONE; +}) + +;; Extend to register case. Optimize case where source and destination +;; registers match and cases where we can use cltd. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:SI 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 2 ""))] + "reload_completed" + [(const_int 0)] +{ + split_di (&operands[0], 1, &operands[3], &operands[4]); + + if (true_regnum (operands[3]) != true_regnum (operands[1])) + emit_move_insn (operands[3], operands[1]); + + /* Generate a cltd if possible and doing so it profitable. */ + if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD) + && true_regnum (operands[3]) == AX_REG) + { + emit_insn (gen_ashrsi3_31 (operands[4], operands[3], GEN_INT (31))); + DONE; + } + + if (true_regnum (operands[4]) != true_regnum (operands[1])) + emit_move_insn (operands[4], operands[1]); + + emit_insn (gen_ashrsi3_31 (operands[4], operands[4], GEN_INT (31))); + DONE; +}) + +(define_insn "extendhisi2" + [(set (match_operand:SI 0 "register_operand" "=*a,r") + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm")))] + "" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cwtl|cwde}"; + default: + return "movs{wl|x}\t{%1,%0|%0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "SI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) + +(define_insn "*extendhisi2_zext" + [(set (match_operand:DI 0 "register_operand" "=*a,r") + (zero_extend:DI + (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand" "*0,rm"))))] + "TARGET_64BIT" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cwtl|cwde}"; + default: + return "movs{wl|x}\t{%1,%k0|%k0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "SI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) + +(define_insn "extendqihi2" + [(set (match_operand:HI 0 "register_operand" "=*a,r") + (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "*0,qm")))] + "" +{ + switch (get_attr_prefix_0f (insn)) + { + case 0: + return "{cbtw|cbw}"; + default: + return "movs{bw|x}\t{%1,%0|%0, %1}"; + } +} + [(set_attr "type" "imovx") + (set_attr "mode" "HI") + (set (attr "prefix_0f") + ;; movsx is short decodable while cwtl is vector decoded. + (if_then_else (and (eq_attr "cpu" "!k6") + (eq_attr "alternative" "0")) + (const_string "0") + (const_string "1"))) + (set (attr "modrm") + (if_then_else (eq_attr "prefix_0f" "0") + (const_string "0") + (const_string "1")))]) + +(define_insn "extendqisi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))] + "" + "movs{bl|x}\t{%1,%0|%0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +(define_insn "*extendqisi2_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (sign_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm"))))] + "TARGET_64BIT" + "movs{bl|x}\t{%1,%k0|%k0, %1}" + [(set_attr "type" "imovx") + (set_attr "mode" "SI")]) + +;; Conversions between float and double. + +;; These are all no-ops in the model used for the 80387. So just +;; emit moves. + +;; %%% Kill these when call knows how to work out a DFmode push earlier. +(define_insn "*dummy_extendsfdf2" + [(set (match_operand:DF 0 "push_operand" "=<") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY2")))] + "0" + "#") + +(define_split + [(set (match_operand:DF 0 "push_operand" "") + (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))] + "" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8))) + (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))]) + +(define_insn "*dummy_extendsfxf2" + [(set (match_operand:XF 0 "push_operand" "=<") + (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "f")))] + "0" + "#") + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))] + "" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") + +(define_split + [(set (match_operand:XF 0 "push_operand" "") + (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))] + "" + [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2))) + (set (mem:DF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))] + "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);") + +(define_expand "extendsfdf2" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (float_extend:DF (match_operand:SF 1 "general_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387) + && standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, DFmode, operands[1], SFmode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (SFmode, operands[1])); + } +}) + +/* For converting SF(xmm2) to DF(xmm1), use the following code instead of + cvtss2sd: + unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs + cvtps2pd xmm2,xmm1 + We do the conversion post reload to avoid producing of 128bit spills + that might lead to ICE on 32bit target. The sequence unlikely combine + anyway. */ +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "")))] + "TARGET_USE_VECTOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 2) + (float_extend:V2DF + (vec_select:V2SF + (match_dup 3) + (parallel [(const_int 0) (const_int 1)]))))] +{ + operands[2] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); + operands[3] = simplify_gen_subreg (V4SFmode, operands[0], DFmode, 0); + /* Use movss for loading from memory, unpcklps reg, reg for registers. + Try to avoid move when unpacking can be done in source. */ + if (REG_P (operands[1])) + { + /* If it is unsafe to overwrite upper half of source, we need + to move to destination and unpack there. */ + if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER + || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4) + && true_regnum (operands[0]) != true_regnum (operands[1])) + { + rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0])); + emit_move_insn (tmp, operands[1]); + } + else + operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0); + emit_insn (gen_sse_unpcklps (operands[3], operands[3], operands[3])); + } + else + emit_insn (gen_vec_setv4sf_0 (operands[3], + CONST0_RTX (V4SFmode), operands[1])); +}) + +(define_insn "*extendsfdf2_mixed" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x") + (float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "fm,f,xm")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387" +{ + switch (which_alternative) + { + case 0: + case 1: + return output_387_reg_move (insn, operands); + + case 2: + return "%vcvtss2sd\t{%1, %d0|%d0, %1}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,fmov,ssecvt") + (set_attr "prefix" "orig,orig,maybe_vex") + (set_attr "mode" "SF,XF,DF")]) + +(define_insn "*extendsfdf2_sse" + [(set (match_operand:DF 0 "nonimmediate_operand" "=x") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "%vcvtss2sd\t{%1, %d0|%d0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "DF")]) + +(define_insn "*extendsfdf2_i387" + [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m") + (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))] + "TARGET_80387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "SF,XF")]) + +(define_expand "extendxf2" + [(set (match_operand:XF 0 "nonimmediate_operand" "") + (float_extend:XF (match_operand:MODEF 1 "general_operand" "")))] + "TARGET_80387" +{ + /* ??? Needed for compress_float_constant since all fp constants + are LEGITIMATE_CONSTANT_P. */ + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + if (standard_80387_constant_p (operands[1]) > 0) + { + operands[1] = simplify_const_unary_operation + (FLOAT_EXTEND, XFmode, operands[1], mode); + emit_move_insn_1 (operands[0], operands[1]); + DONE; + } + operands[1] = validize_mem (force_const_mem (mode, operands[1])); + } +}) + +(define_insn "*extendxf2_i387" + [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m") + (float_extend:XF + (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))] + "TARGET_80387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" ",XF")]) + +;; %%% This seems bad bad news. +;; This cannot output into an f-reg because there is no way to be sure +;; of truncating in that case. Otherwise this is just like a simple move +;; insn. So we pretend we can output to a reg in order to get better +;; register preferencing, but we really use a stack slot. + +;; Conversion from DFmode to SFmode. + +(define_expand "truncdfsf2" + [(set (match_operand:SF 0 "nonimmediate_operand" "") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" +{ + if (TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_MIX_SSE_I387) + ; + else if (flag_unsafe_math_optimizations) + ; + else + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + rtx temp = assign_386_stack_local (SFmode, slot); + emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp)); + DONE; + } +}) + +/* For converting DF(xmm2) to SF(xmm1), use the following code instead of + cvtsd2ss: + unpcklpd xmm2,xmm2 ; packed conversion might crash on signaling NaNs + cvtpd2ps xmm2,xmm1 + We do the conversion post reload to avoid producing of 128bit spills + that might lead to ICE on 32bit target. The sequence unlikely combine + anyway. */ +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "")))] + "TARGET_USE_VECTOR_FP_CONVERTS + && optimize_insn_for_speed_p () + && reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 2) + (vec_concat:V4SF + (float_truncate:V2SF + (match_dup 4)) + (match_dup 3)))] +{ + operands[2] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0); + operands[3] = CONST0_RTX (V2SFmode); + operands[4] = simplify_gen_subreg (V2DFmode, operands[0], SFmode, 0); + /* Use movsd for loading from memory, unpcklpd for registers. + Try to avoid move when unpacking can be done in source, or SSE3 + movddup is available. */ + if (REG_P (operands[1])) + { + if (!TARGET_SSE3 + && true_regnum (operands[0]) != true_regnum (operands[1]) + && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER + || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8)) + { + rtx tmp = simplify_gen_subreg (DFmode, operands[0], SFmode, 0); + emit_move_insn (tmp, operands[1]); + operands[1] = tmp; + } + else if (!TARGET_SSE3) + operands[4] = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); + emit_insn (gen_vec_dupv2df (operands[4], operands[1])); + } + else + emit_insn (gen_sse2_loadlpd (operands[4], + CONST0_RTX (V2DFmode), operands[1])); +}) + +(define_expand "truncdfsf2_with_temp" + [(parallel [(set (match_operand:SF 0 "" "") + (float_truncate:SF (match_operand:DF 1 "" ""))) + (clobber (match_operand:SF 2 "" ""))])] + "") + +(define_insn "*truncdfsf_fast_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=fm,x") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,xm")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 && flag_unsafe_math_optimizations" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + case 1: + return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "fmov,ssecvt") + (set_attr "prefix" "orig,maybe_vex") + (set_attr "mode" "SF")]) + +;; Yes, this one doesn't depend on flag_unsafe_math_optimizations, +;; because nothing we do here is unsafe. +(define_insn "*truncdfsf_fast_sse" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "xm")))] + "TARGET_SSE2 && TARGET_SSE_MATH" + "%vcvtsd2ss\t{%1, %d0|%d0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf_fast_i387" + [(set (match_operand:SF 0 "nonimmediate_operand" "=fm") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f")))] + "TARGET_80387 && flag_unsafe_math_optimizations" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,Y2 ,?f,?x,?*r") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,Y2m,f ,f ,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,X ,m ,m ,m"))] + "TARGET_MIX_SSE_I387" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + case 1: + return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; + + default: + return "#"; + } +} + [(set_attr "type" "fmov,ssecvt,multi,multi,multi") + (set_attr "unit" "*,*,i387,i387,i387") + (set_attr "prefix" "orig,maybe_vex,orig,orig,orig") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf_i387" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r") + (float_truncate:SF + (match_operand:DF 1 "nonimmediate_operand" "f ,f ,f ,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))] + "TARGET_80387" +{ + switch (which_alternative) + { + case 0: + return output_387_reg_move (insn, operands); + + default: + return "#"; + } +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") + (set_attr "mode" "SF")]) + +(define_insn "*truncdfsf2_i387_1" + [(set (match_operand:SF 0 "memory_operand" "=m") + (float_truncate:SF + (match_operand:DF 1 "register_operand" "f")))] + "TARGET_80387 + && !(TARGET_SSE2 && TARGET_SSE_MATH) + && !TARGET_MIX_SSE_I387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "SF")]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (float_truncate:SF + (match_operand:DF 1 "fp_register_operand" ""))) + (clobber (match_operand 2 "" ""))] + "reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] +{ + operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1])); +}) + +;; Conversion from XFmode to {SF,DF}mode + +(define_expand "truncxf2" + [(parallel [(set (match_operand:MODEF 0 "nonimmediate_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_dup 2))])] + "TARGET_80387" +{ + if (flag_unsafe_math_optimizations) + { + rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (mode); + emit_insn (gen_truncxf2_i387_noop (reg, operands[1])); + if (reg != operands[0]) + emit_move_insn (operands[0], reg); + DONE; + } + else + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + operands[2] = assign_386_stack_local (mode, slot); + } +}) + +(define_insn "*truncxfsf2_mixed" + [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?x,?*r") + (float_truncate:SF + (match_operand:XF 1 "register_operand" "f ,f ,f ,f"))) + (clobber (match_operand:SF 2 "memory_operand" "=X,m ,m ,m"))] + "TARGET_80387" +{ + gcc_assert (!which_alternative); + return output_387_reg_move (insn, operands); +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") + (set_attr "mode" "SF")]) + +(define_insn "*truncxfdf2_mixed" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?Y2,?*r") + (float_truncate:DF + (match_operand:XF 1 "register_operand" "f ,f ,f ,f"))) + (clobber (match_operand:DF 2 "memory_operand" "=X,m ,m ,m"))] + "TARGET_80387" +{ + gcc_assert (!which_alternative); + return output_387_reg_move (insn, operands); +} + [(set_attr "type" "fmov,multi,multi,multi") + (set_attr "unit" "*,i387,i387,i387") + (set_attr "mode" "DF")]) + +(define_insn "truncxf2_i387_noop" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" "f")))] + "TARGET_80387 && flag_unsafe_math_optimizations" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "")]) + +(define_insn "*truncxf2_i387" + [(set (match_operand:MODEF 0 "memory_operand" "=m") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" "f")))] + "TARGET_80387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:MODEF 2 "memory_operand" ""))] + "TARGET_80387 && reload_completed" + [(set (match_dup 2) (float_truncate:MODEF (match_dup 1))) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:MODEF 0 "memory_operand" "") + (float_truncate:MODEF + (match_operand:XF 1 "register_operand" ""))) + (clobber (match_operand:MODEF 2 "memory_operand" ""))] + "TARGET_80387" + [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))] + "") + +;; Signed conversion to DImode. + +(define_expand "fix_truncxfdi2" + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:XF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +(define_expand "fix_truncdi2" + [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "") + (fix:DI (match_operand:MODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (mode))" +{ + if (TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (TARGET_64BIT && SSE_FLOAT_MODE_P (mode)) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode); + emit_insn (gen_fix_truncdi_sse (out, operands[1])); + if (out != operands[0]) + emit_move_insn (operands[0], out); + DONE; + } +}) + +;; Signed conversion to SImode. + +(define_expand "fix_truncxfsi2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:XF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +(define_expand "fix_truncsi2" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "") + (fix:SI (match_operand:MODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 || SSE_FLOAT_MODE_P (mode)" +{ + if (TARGET_FISTTP + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } + if (SSE_FLOAT_MODE_P (mode)) + { + rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode); + emit_insn (gen_fix_truncsi_sse (out, operands[1])); + if (out != operands[0]) + emit_move_insn (operands[0], out); + DONE; + } +}) + +;; Signed conversion to HImode. + +(define_expand "fix_trunchi2" + [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "") + (fix:HI (match_operand:X87MODEF 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_80387 + && !(SSE_FLOAT_MODE_P (mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))" +{ + if (TARGET_FISTTP) + { + emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1])); + DONE; + } +}) + +;; Unsigned conversion to SImode. + +(define_expand "fixuns_truncsi2" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (unsigned_fix:SI + (match_operand:MODEF 1 "nonimmediate_operand" ""))) + (use (match_dup 2)) + (clobber (match_scratch: 3 "")) + (clobber (match_scratch: 4 ""))])] + "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH" +{ + enum machine_mode mode = mode; + enum machine_mode vecmode = mode; + REAL_VALUE_TYPE TWO31r; + rtx two31; + + if (optimize_insn_for_size_p ()) + FAIL; + + real_ldexp (&TWO31r, &dconst1, 31); + two31 = const_double_from_real_value (TWO31r, mode); + two31 = ix86_build_const_vector (mode, true, two31); + operands[2] = force_reg (vecmode, two31); +}) + +(define_insn_and_split "*fixuns_trunc_1" + [(set (match_operand:SI 0 "register_operand" "=&x,&x") + (unsigned_fix:SI + (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm"))) + (use (match_operand: 4 "nonimmediate_operand" "m,x")) + (clobber (match_scratch: 1 "=x,&x")) + (clobber (match_scratch: 2 "=x,x"))] + "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH + && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + ix86_split_convert_uns_si_sse (operands); + DONE; +}) + +;; Unsigned conversion to HImode. +;; Without these patterns, we'll try the unsigned SI conversion which +;; is complex for SSE, rather than the signed SI conversion, which isn't. + +(define_expand "fixuns_trunchi2" + [(set (match_dup 2) + (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" ""))) + (set (match_operand:HI 0 "nonimmediate_operand" "") + (subreg:HI (match_dup 2) 0))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "operands[2] = gen_reg_rtx (SImode);") + +;; When SSE is available, it is always faster to use it! +(define_insn "fix_truncdi_sse" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] + "TARGET_64BIT && SSE_FLOAT_MODE_P (mode) + && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "%vcvtts2si{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double")]) + +(define_insn "fix_truncsi_sse" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))] + "SSE_FLOAT_MODE_P (mode) + && (!TARGET_FISTTP || TARGET_SSE_MATH)" + "%vcvtts2si\t{%1, %0|%0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,vector") + (set_attr "amdfam10_decode" "double,double")]) + +;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns. +(define_peephole2 + [(set (match_operand:MODEF 0 "register_operand" "") + (match_operand:MODEF 1 "memory_operand" "")) + (set (match_operand:SSEMODEI24 2 "register_operand" "") + (fix:SSEMODEI24 (match_dup 0)))] + "TARGET_SHORTEN_X87_SSE + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))] + "") + +;; Avoid vector decoded forms of the instruction. +(define_peephole2 + [(match_scratch:DF 2 "Y2") + (set (match_operand:SSEMODEI24 0 "register_operand" "") + (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))] + "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] + "") + +(define_peephole2 + [(match_scratch:SF 2 "x") + (set (match_operand:SSEMODEI24 0 "register_operand" "") + (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))] + "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))] + "") + +(define_insn_and_split "fix_trunc_fisttp_i387_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" "")))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_trunc_i387_fisttp (operands[0], operands[1])); + else + { + operands[2] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fix_trunc_i387_fisttp_with_temp (operands[0], + operands[1], + operands[2])); + } + DONE; +} + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) + +(define_insn "fix_trunc_i387_fisttp" + [(set (match_operand:X87MODEI 0 "memory_operand" "=m") + (fix:X87MODEI (match_operand 1 "register_operand" "f"))) + (clobber (match_scratch:XF 2 "=&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH)" + "* return output_fix_trunc (insn, operands, 1);" + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) + +(define_insn "fix_trunc_i387_fisttp_with_temp" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI (match_operand 1 "register_operand" "f,f"))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 3 "=&1f,&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_FISTTP + && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && TARGET_SSE_MATH)" + "#" + [(set_attr "type" "fisttp") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI 0 "register_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 2) (fix:X87MODEI (match_dup 1))) + (clobber (match_dup 3))]) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:X87MODEI 0 "memory_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (match_operand:X87MODEI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:X87MODEI (match_dup 1))) + (clobber (match_dup 3))])] + "") + +;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description +;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control +;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG +;; clobbering insns can be used. Look at emit_i387_cw_initialization () +;; function in i386.c. +(define_insn_and_split "*fix_trunc_i387_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (fix:X87MODEI (match_operand 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && (TARGET_64BIT || mode != DImode)) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_TRUNC] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fix_trunc_i387 (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fix_trunc_i387_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "")]) + +(define_insn "fix_truncdi_i387" + [(set (match_operand:DI 0 "memory_operand" "=m") + (fix:DI (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "DI")]) + +(define_insn "fix_truncdi_i387_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (fix:DI (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (fix:DI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (fix:DI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (fix:DI (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:DI (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])] + "") + +(define_insn "fix_trunc_i387" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (fix:X87MODEI12 (match_operand 1 "register_operand" "f"))) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "")]) + +(define_insn "fix_trunc_i387_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (fix:X87MODEI12 (match_operand 1 "register_operand" "f,f"))) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && !TARGET_FISTTP + && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (fix:X87MODEI12 (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (fix:X87MODEI12 (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (fix:X87MODEI12 (match_operand 1 "register_operand" ""))) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1))) + (use (match_dup 2)) + (use (match_dup 3))])] + "") + +(define_insn "x86_fnstcw_1" + [(set (match_operand:HI 0 "memory_operand" "=m") + (unspec:HI [(reg:HI FPCR_REG)] UNSPEC_FSTCW))] + "TARGET_80387" + "fnstcw\t%0" + [(set_attr "length" "2") + (set_attr "mode" "HI") + (set_attr "unit" "i387")]) + +(define_insn "x86_fldcw_1" + [(set (reg:HI FPCR_REG) + (unspec:HI [(match_operand:HI 0 "memory_operand" "m")] UNSPEC_FLDCW))] + "TARGET_80387" + "fldcw\t%0" + [(set_attr "length" "2") + (set_attr "mode" "HI") + (set_attr "unit" "i387") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector")]) + +;; Conversion between fixed point and floating point. + +;; Even though we only accept memory inputs, the backend _really_ +;; wants to be able to do this between registers. + +(define_expand "floathi2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" + "") + +;; Pre-reload splitter to add memory clobber to the pattern. +(define_insn_and_split "*floathi2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "register_operand" "")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (float:X87MODEF (match_dup 1))) + (clobber (match_dup 2))])] + "operands[2] = assign_386_stack_local (HImode, SLOT_TEMP);") + +(define_insn "*floathi2_i387_with_temp" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_operand:HI 2 "memory_operand" "=m,m"))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" + "#" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floathi2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (float:X87MODEF (match_operand:HI 1 "memory_operand" "m")))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)" + "fild%z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "register_operand" ""))) + (clobber (match_operand:HI 2 "memory_operand" ""))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] + "") + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:HI 1 "memory_operand" ""))) + (clobber (match_operand:HI 2 "memory_operand" ""))] + "TARGET_80387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && reload_completed" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] + "") + +(define_expand "float2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))] + "TARGET_80387 + || ((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "") + +;; Pre-reload splitter to add memory clobber to the pattern. +(define_insn_and_split "*float2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))] + "((TARGET_80387 + && (!((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387)) + || ((mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && ((mode == SImode + && TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS + && optimize_function_for_speed_p (cfun) + && flag_trapping_math) + || !(TARGET_INTER_UNIT_CONVERSIONS + || optimize_function_for_size_p (cfun))))) + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1))) + (clobber (match_dup 2))])] +{ + operands[2] = assign_386_stack_local (mode, SLOT_TEMP); + + /* Avoid store forwarding (partial memory) stall penalty + by passing DImode value through XMM registers. */ + if (mode == DImode && !TARGET_64BIT + && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && optimize_function_for_speed_p (cfun)) + { + emit_insn (gen_floatdi2_i387_with_xmm (operands[0], + operands[1], + operands[2])); + DONE; + } +}) + +(define_insn "*floatsi2_vector_mixed_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x") + (float:MODEF + (match_operand:SI 1 "nonimmediate_operand" "m,?r,r,m,!x"))) + (clobber (match_operand:SI 2 "memory_operand" "=X,m,m,X,m"))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "fmov,multi,sseicvt,sseicvt,sseicvt") + (set_attr "mode" ",,,,") + (set_attr "unit" "*,i387,*,*,*") + (set_attr "athlon_decode" "*,*,double,direct,double") + (set_attr "amdfam10_decode" "*,*,vector,double,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsi2_vector_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (float:MODEF (match_operand:SI 1 "memory_operand" "m,m")))] + "TARGET_SSE2 && TARGET_MIX_SSE_I387 + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,sseicvt") + (set_attr "mode" ",") + (set_attr "unit" "i387,*") + (set_attr "athlon_decode" "*,direct") + (set_attr "amdfam10_decode" "*,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*float2_mixed_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r,r,m"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m,m,X"))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387" + "#" + [(set_attr "type" "fmov,multi,sseicvt,sseicvt") + (set_attr "mode" "") + (set_attr "unit" "*,i387,*,*") + (set_attr "athlon_decode" "*,*,double,direct") + (set_attr "amdfam10_decode" "*,*,vector,double") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && TARGET_INTER_UNIT_CONVERSIONS + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))] + "") + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:MODEF (match_dup 2)))] + "") + +(define_insn "*float2_mixed_interunit" + [(set (match_operand:MODEF 0 "register_operand" "=f,x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,r,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))" + "@ + fild%z1\t%1 + %vcvtsi2s\t{%1, %d0|%d0, %1} + %vcvtsi2s\t{%1, %d0|%d0, %1}" + [(set_attr "type" "fmov,sseicvt,sseicvt") + (set_attr "prefix" "orig,maybe_vex,maybe_vex") + (set_attr "mode" "") + (set_attr "unit" "i387,*,*") + (set_attr "athlon_decode" "*,double,direct") + (set_attr "amdfam10_decode" "*,vector,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*float2_mixed_nointerunit" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))" + "@ + fild%z1\t%1 + %vcvtsi2s\t{%1, %d0|%d0, %1}" + [(set_attr "type" "fmov,sseicvt") + (set_attr "prefix" "orig,maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "*,direct") + (set_attr "amdfam10_decode" "*,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsi2_vector_sse_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,x") + (float:MODEF + (match_operand:SI 1 "nonimmediate_operand" "r,m,!x"))) + (clobber (match_operand:SI 2 "memory_operand" "=m,X,m"))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "sseicvt") + (set_attr "mode" ",,") + (set_attr "athlon_decode" "double,direct,double") + (set_attr "amdfam10_decode" "vector,double,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*floatsi2_vector_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (float:MODEF (match_operand:SI 1 "memory_operand" "m")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "sseicvt") + (set_attr "mode" "") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "double") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "register_operand" ""))) + (clobber (match_operand:SI 2 "memory_operand" ""))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + rtx op1 = operands[1]; + + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + if (GET_CODE (op1) == SUBREG) + op1 = SUBREG_REG (op1); + + if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES) + { + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + } + /* We can ignore possible trapping value in the + high part of SSE register for non-trapping math. */ + else if (SSE_REG_P (op1) && !flag_trapping_math) + operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0); + else + { + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_move_insn (operands[2], operands[1]); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[2])); + } + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "memory_operand" ""))) + (clobber (match_operand:SI 2 "memory_operand" ""))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "register_operand" "")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + rtx op1 = operands[1]; + + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + if (GET_CODE (op1) == SUBREG) + op1 = SUBREG_REG (op1); + + if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES) + { + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + } + /* We can ignore possible trapping value in the + high part of SSE register for non-trapping math. */ + else if (SSE_REG_P (op1) && !flag_trapping_math) + operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0); + else + gcc_unreachable (); + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SI 1 "memory_operand" "")))] + "TARGET_SSE2 && TARGET_SSE_MATH + && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(const_int 0)] +{ + operands[3] = simplify_gen_subreg (mode, operands[0], + mode, 0); + operands[4] = simplify_gen_subreg (V4SImode, operands[0], mode, 0); + + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + emit_insn + (gen_sse2_cvtdq2p (operands[3], operands[4])); + DONE; +}) + +(define_insn "*float2_sse_with_temp" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,X"))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "#" + [(set_attr "type" "sseicvt") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,direct") + (set_attr "amdfam10_decode" "vector,double") + (set_attr "fp_int_src" "true")]) + +(define_insn "*float2_sse_interunit" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (float:MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))" + "%vcvtsi2s\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "double,direct") + (set_attr "amdfam10_decode" "vector,double") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "nonimmediate_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))] + "") + +(define_insn "*float2_sse_nointerunit" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (float:MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m")))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))" + "%vcvtsi2s\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sseicvt") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "double") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:MODEF (match_dup 2)))] + "") + +(define_split + [(set (match_operand:MODEF 0 "register_operand" "") + (float:MODEF (match_operand:SSEMODEI24 1 "memory_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "(mode != DImode || TARGET_64BIT) + && SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && reload_completed + && (SSE_REG_P (operands[0]) + || (GET_CODE (operands[0]) == SUBREG + && SSE_REG_P (operands[0])))" + [(set (match_dup 0) (float:MODEF (match_dup 1)))] + "") + +(define_insn "*float2_i387_with_temp" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m"))] + "TARGET_80387" + "@ + fild%z1\t%1 + #" + [(set_attr "type" "fmov,multi") + (set_attr "mode" "") + (set_attr "unit" "*,i387") + (set_attr "fp_int_src" "true")]) + +(define_insn "*float2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (float:X87MODEF + (match_operand:SSEMODEI24 1 "memory_operand" "m")))] + "TARGET_80387" + "fild%z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "TARGET_80387 + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] + "") + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:SSEMODEI24 1 "memory_operand" ""))) + (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))] + "TARGET_80387 + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] + "") + +;; Avoid store forwarding (partial memory) stall penalty +;; by passing DImode value through XMM registers. */ + +(define_insn "floatdi2_i387_with_xmm" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (float:X87MODEF + (match_operand:DI 1 "nonimmediate_operand" "m,?r"))) + (clobber (match_scratch:V4SI 3 "=X,x")) + (clobber (match_scratch:V4SI 4 "=X,x")) + (clobber (match_operand:DI 2 "memory_operand" "=X,m"))] + "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && !TARGET_64BIT && optimize_function_for_speed_p (cfun)" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "") + (set_attr "unit" "i387") + (set_attr "fp_int_src" "true")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:DI 1 "register_operand" ""))) + (clobber (match_scratch:V4SI 3 "")) + (clobber (match_scratch:V4SI 4 "")) + (clobber (match_operand:DI 2 "memory_operand" ""))] + "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && !TARGET_64BIT && optimize_function_for_speed_p (cfun) + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) (float:X87MODEF (match_dup 2)))] +{ + /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). + Assemble the 64-bit DImode value in an xmm register. */ + emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, operands[1], 0))); + emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode), + gen_rtx_SUBREG (SImode, operands[1], 4))); + emit_insn (gen_sse2_punpckldq (operands[3], operands[3], operands[4])); + + operands[3] = gen_rtx_REG (DImode, REGNO (operands[3])); +}) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (float:X87MODEF (match_operand:DI 1 "memory_operand" ""))) + (clobber (match_scratch:V4SI 3 "")) + (clobber (match_scratch:V4SI 4 "")) + (clobber (match_operand:DI 2 "memory_operand" ""))] + "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES + && !TARGET_64BIT && optimize_function_for_speed_p (cfun) + && reload_completed + && FP_REG_P (operands[0])" + [(set (match_dup 0) (float:X87MODEF (match_dup 1)))] + "") + +;; Avoid store forwarding (partial memory) stall penalty by extending +;; SImode value to DImode through XMM register instead of pushing two +;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES +;; targets benefit from this optimization. Also note that fild +;; loads from memory only. + +(define_insn "*floatunssi2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,f") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand" "x,m"))) + (clobber (match_operand:DI 2 "memory_operand" "=m,m")) + (clobber (match_scratch:SI 3 "=X,x"))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE" + "#" + [(set_attr "type" "multi") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "register_operand" ""))) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] + "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);") + +(define_split + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "memory_operand" ""))) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "!TARGET_64BIT + && TARGET_80387 && TARGET_SSE + && reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (float:X87MODEF (match_dup 2)))] +{ + emit_move_insn (operands[3], operands[1]); + operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0); +}) + +(define_expand "floatunssi2" + [(parallel + [(set (match_operand:X87MODEF 0 "register_operand" "") + (unsigned_float:X87MODEF + (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (match_dup 2)) + (clobber (match_scratch:SI 3 ""))])] + "!TARGET_64BIT + && ((TARGET_80387 && TARGET_SSE) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + { + ix86_expand_convert_uns_si_sse (operands[0], operands[1]); + DONE; + } + else + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + operands[2] = assign_386_stack_local (DImode, slot); + } +}) + +(define_expand "floatunsdisf2" + [(use (match_operand:SF 0 "register_operand" "")) + (use (match_operand:DI 1 "nonimmediate_operand" ""))] + "TARGET_64BIT && TARGET_SSE_MATH" + "x86_emit_floatuns (operands); DONE;") + +(define_expand "floatunsdidf2" + [(use (match_operand:DF 0 "register_operand" "")) + (use (match_operand:DI 1 "nonimmediate_operand" ""))] + "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK) + && TARGET_SSE2 && TARGET_SSE_MATH" +{ + if (TARGET_64BIT) + x86_emit_floatuns (operands); + else + ix86_expand_convert_uns_didf_sse (operands[0], operands[1]); + DONE; +}) + +;; Add instructions + +;; %%% splits for addditi3 + +(define_expand "addti3" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (PLUS, TImode, operands); DONE;") + +(define_insn "*addti3_1" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0,0") + (match_operand:TI 2 "x86_64_general_operand" "roe,re"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, TImode, operands)" + "#") + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (plus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)] + UNSPEC_ADD_CARRY)) + (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5))) + (clobber (reg:CC FLAGS_REG))])] + "split_ti (&operands[0], 3, &operands[0], &operands[3]);") + +;; %%% splits for addsidi3 +; [(set (match_operand:DI 0 "nonimmediate_operand" "") +; (plus:DI (match_operand:DI 1 "general_operand" "") +; (zero_extend:DI (match_operand:SI 2 "general_operand" ""))))] + +(define_expand "adddi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" "")))] + "" + "ix86_expand_binary_operator (PLUS, DImode, operands); DONE;") + +(define_insn "*adddi3_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "general_operand" "roiF,riF"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" + "#") + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)] + UNSPEC_ADD_CARRY)) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 4)) + (match_dup 5))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (&operands[0], 3, &operands[0], &operands[3]);") + +(define_insn "adddi3_carry_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (plus:DI (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "") + (match_operand:DI 1 "nonimmediate_operand" "%0,0")) + (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" + "adc{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "DI")]) + +(define_insn "*adddi3_cc_rex64" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm")] + UNSPEC_ADD_CARRY)) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" + "add{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*3_cc_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plusminus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operand:SWI 2 "" ",m")) + (match_dup 1))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (plusminus:SWI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (, mode, operands)" + "{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*add3_cconly_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0") + (match_operand:SWI 2 "" "m")) + (match_dup 1))) + (clobber (match_scratch:SWI 0 "="))] + "ix86_binary_operator_ok (PLUS, mode, operands)" + "add{}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "")]) + +(define_insn "*sub3_cconly_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "m,") + (match_operand:SWI 1 "" ",m")) + (match_dup 0)))] + "" + "cmp{}\t{%1, %0|%0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "")]) + +(define_insn "*si3_zext_cc_overflow" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (plusminus:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:SI 2 "general_operand" "g")) + (match_dup 1))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plusminus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" + "{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "addqi3_carry" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (plus:QI (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "") + (match_operand:QI 1 "nonimmediate_operand" "%0,0")) + (match_operand:QI 2 "general_operand" "qn,qm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, QImode, operands)" + "adc{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "QI")]) + +(define_insn "addhi3_carry" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (plus:HI (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "") + (match_operand:HI 1 "nonimmediate_operand" "%0,0")) + (match_operand:HI 2 "general_operand" "rn,rm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, HImode, operands)" + "adc{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "HI")]) + +(define_insn "addsi3_carry" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") + (match_operand:SI 1 "nonimmediate_operand" "%0,0")) + (match_operand:SI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, SImode, operands)" + "adc{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + +(define_insn "*addsi3_carry_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") + (match_operand:SI 1 "nonimmediate_operand" "%0")) + (match_operand:SI 2 "general_operand" "g")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" + "adc{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + +(define_insn "*addsi3_cc" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "ri,rm")] + UNSPEC_ADD_CARRY)) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (plus:SI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, SImode, operands)" + "add{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "addqi3_cc" + [(set (reg:CC FLAGS_REG) + (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qn,qm")] + UNSPEC_ADD_CARRY)) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (plus:QI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (PLUS, QImode, operands)" + "add{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_expand "addsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "" + "ix86_expand_binary_operator (PLUS, SImode, operands); DONE;") + +(define_insn "*lea_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operand:SI 1 "no_seg_address_operand" "p"))] + "!TARGET_64BIT" + "lea{l}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn "*lea_1_rex64" + [(set (match_operand:SI 0 "register_operand" "=r") + (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0))] + "TARGET_64BIT" + "lea{l}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn "*lea_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (subreg:SI (match_operand:DI 1 "no_seg_address_operand" "p") 0)))] + "TARGET_64BIT" + "lea{l}\t{%a1, %k0|%k0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn "*lea_2_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (match_operand:DI 1 "no_seg_address_operand" "p"))] + "TARGET_64BIT" + "lea{q}\t{%a1, %0|%0, %a1}" + [(set_attr "type" "lea") + (set_attr "mode" "DI")]) + +;; The lea patterns for non-Pmodes needs to be matched by several +;; insns converted to real lea by splitters. + +(define_insn_and_split "*lea_general_1" + [(set (match_operand 0 "register_operand" "=r") + (plus (plus (match_operand 1 "index_register_operand" "l") + (match_operand 2 "register_operand" "r")) + (match_operand 3 "immediate_operand" "i")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode + || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && GET_MODE (operands[0]) == GET_MODE (operands[2]) + && (GET_MODE (operands[0]) == GET_MODE (operands[3]) + || GET_MODE (operands[3]) == VOIDmode)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx pat; + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); + operands[3] = gen_lowpart (Pmode, operands[3]); + pat = gen_rtx_PLUS (Pmode, gen_rtx_PLUS (Pmode, operands[1], operands[2]), + operands[3]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI (match_operand:SI 1 "index_register_operand" "l") + (match_operand:SI 2 "register_operand" "r")) + (match_operand:SI 3 "immediate_operand" "i"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (plus:DI (match_dup 1) + (match_dup 2)) + (match_dup 3)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); + operands[3] = gen_lowpart (Pmode, operands[3]); +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_2" + [(set (match_operand 0 "register_operand" "=r") + (plus (mult (match_operand 1 "index_register_operand" "l") + (match_operand 2 "const248_operand" "i")) + (match_operand 3 "nonmemory_operand" "ri")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode + || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && (GET_MODE (operands[0]) == GET_MODE (operands[3]) + || GET_MODE (operands[3]) == VOIDmode)" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx pat; + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); + pat = gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], operands[2]), + operands[3]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_2_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (mult:SI (match_operand:SI 1 "index_register_operand" "l") + (match_operand:SI 2 "const248_operand" "n")) + (match_operand:SI 3 "nonmemory_operand" "ri"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (mult:DI (match_dup 1) + (match_dup 2)) + (match_dup 3)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_3" + [(set (match_operand 0 "register_operand" "=r") + (plus (plus (mult (match_operand 1 "index_register_operand" "l") + (match_operand 2 "const248_operand" "i")) + (match_operand 3 "register_operand" "r")) + (match_operand 4 "immediate_operand" "i")))] + "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode + || (TARGET_64BIT && GET_MODE (operands[0]) == SImode)) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && GET_MODE (operands[0]) == GET_MODE (operands[3])" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx pat; + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); + operands[4] = gen_lowpart (Pmode, operands[4]); + pat = gen_rtx_PLUS (Pmode, + gen_rtx_PLUS (Pmode, gen_rtx_MULT (Pmode, operands[1], + operands[2]), + operands[3]), + operands[4]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*lea_general_3_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (plus:SI (plus:SI (mult:SI + (match_operand:SI 1 "index_register_operand" "l") + (match_operand:SI 2 "const248_operand" "n")) + (match_operand:SI 3 "register_operand" "r")) + (match_operand:SI 4 "immediate_operand" "i"))))] + "TARGET_64BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (plus:DI (mult:DI (match_dup 1) + (match_dup 2)) + (match_dup 3)) + (match_dup 4)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[3] = gen_lowpart (Pmode, operands[3]); + operands[4] = gen_lowpart (Pmode, operands[4]); +} + [(set_attr "type" "lea") + (set_attr "mode" "SI")]) + +(define_insn "*adddi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r") + (match_operand:DI 2 "x86_64_general_operand" "rme,re,le"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{q}\t{%a2, %0|%0, %a2}"; + + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{q}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "lea") + ; Current assemblers are broken and do not allow @GOTOFF in + ; ought but a memory context. + (match_operand:DI 2 "pic_symbolic_operand" "") + (const_string "lea") + (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) + (set_attr "mode" "DI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "x86_64_nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) + (plus:DI (match_dup 1) + (match_dup 2)))] + "") + +(define_insn "*adddi_2_rex64" + [(set (reg FLAGS_REG) + (compare + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "rme,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm") + (plus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, DImode, operands) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{q}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* ???? We ought to handle there the 32bit case too + - do we need new constraint? */ + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "DI")]) + +(define_insn "*adddi_3_rex64" + [(set (reg FLAGS_REG) + (compare (neg:DI (match_operand:DI 2 "x86_64_general_operand" "rme")) + (match_operand:DI 1 "x86_64_general_operand" "%0"))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCZmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2])) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{q}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* ???? We ought to handle there the 32bit case too + - do we need new constraint? */ + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "DI")]) + +; For comparisons against 1, -1 and 128, we may generate better code +; by converting cmp to add, inc or dec as done by peephole2. This pattern +; is matched then. We can't accept general immediate, because for +; case of overflows, the result is messed up. +; This pattern also don't hold of 0x8000000000000000, since the value overflows +; when negated. +; Also carry flag is reversed compared to cmp, so this conversion is valid +; only for comparisons not depending on it. +(define_insn "*adddi_4_rex64" + [(set (reg FLAGS_REG) + (compare (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:DI 2 "x86_64_immediate_operand" "e"))) + (clobber (match_scratch:DI 0 "=rm"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCGCmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx) + return "inc{q}\t%0"; + else + { + gcc_assert (operands[2] == const1_rtx); + return "dec{q}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if ((INTVAL (operands[2]) == -128 + || (INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) != 128)) + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1)))) + return "sub{q}\t{%2, %0|%0, %2}"; + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "DI")]) + +(define_insn "*adddi_5_rex64" + [(set (reg FLAGS_REG) + (compare + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0") + (match_operand:DI 2 "x86_64_general_operand" "rme")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCGOCmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2])) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{q}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{q}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:DI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "DI")]) + + +(define_insn "*addsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r") + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r") + (match_operand:SI 2 "general_operand" "g,ri,li"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (PLUS, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{l}\t{%a2, %0|%0, %a2}"; + + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{l}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "lea") + ; Current assemblers are broken and do not allow @GOTOFF in + ; ought but a memory context. + (match_operand:SI 2 "pic_symbolic_operand" "") + (const_string "lea") + (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) + (set_attr "mode" "SI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand 0 "register_operand" "") + (plus (match_operand 1 "register_operand" "") + (match_operand 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(const_int 0)] +{ + rtx pat; + /* In -fPIC mode the constructs like (const (unspec [symbol_ref])) + may confuse gen_lowpart. */ + if (GET_MODE (operands[0]) != Pmode) + { + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); + } + operands[0] = gen_lowpart (SImode, operands[0]); + pat = gen_rtx_PLUS (Pmode, operands[1], operands[2]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +}) + +;; It may seem that nonimmediate operand is proper one for operand 1. +;; The addsi_1 pattern allows nonimmediate operand at that place and +;; we take care in ix86_binary_operator_ok to not allow two memory +;; operands so proper swapping will be done in reload. This allow +;; patterns constructed from addsi_1 to match. +(define_insn "addsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r") + (match_operand:SI 2 "general_operand" "g,li")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + operands[2] = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0); + return "lea{l}\t{%a2, %k0|%k0, %a2}"; + + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %k0|%k0, %2}"; + } + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + ; Current assemblers are broken and do not allow @GOTOFF in + ; ought but a memory context. + (match_operand:SI 2 "pic_symbolic_operand" "") + (const_string "lea") + (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + ] + (const_string "alu"))) + (set_attr "mode" "SI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonmemory_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) + (zero_extend:DI (subreg:SI (plus:DI (match_dup 1) (match_dup 2)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_lowpart (Pmode, operands[2]); +}) + +(define_insn "*addsi_2" + [(set (reg FLAGS_REG) + (compare + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "g,ri")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") + (plus:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, SImode, operands) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{l}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*addsi_2_zext" + [(set (reg FLAGS_REG) + (compare + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, SImode, operands) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %k0|%k0, %2}"; + } + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +(define_insn "*addsi_3" + [(set (reg FLAGS_REG) + (compare (neg:SI (match_operand:SI 2 "general_operand" "g")) + (match_operand:SI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCZmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2])) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{l}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*addsi_3_zext" + [(set (reg FLAGS_REG) + (compare (neg:SI (match_operand:SI 2 "general_operand" "g")) + (match_operand:SI 1 "nonimmediate_operand" "%0"))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode) + && ix86_binary_operator_ok (PLUS, SImode, operands) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{l}\t%k0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%k0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %k0|%k0, %2}"; + } + return "add{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +; For comparisons against 1, -1 and 128, we may generate better code +; by converting cmp to add, inc or dec as done by peephole2. This pattern +; is matched then. We can't accept general immediate, because for +; case of overflows, the result is messed up. +; This pattern also don't hold of 0x80000000, since the value overflows +; when negated. +; Also carry flag is reversed compared to cmp, so this conversion is valid +; only for comparisons not depending on it. +(define_insn "*addsi_4" + [(set (reg FLAGS_REG) + (compare (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:SI 2 "const_int_operand" "n"))) + (clobber (match_scratch:SI 0 "=rm"))] + "ix86_match_ccmode (insn, CCGCmode) + && (INTVAL (operands[2]) & 0xffffffff) != 0x80000000" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx) + return "inc{l}\t%0"; + else + { + gcc_assert (operands[2] == const1_rtx); + return "dec{l}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if ((INTVAL (operands[2]) == -128 + || (INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) != 128))) + return "sub{l}\t{%2, %0|%0, %2}"; + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "add{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +(define_insn "*addsi_5" + [(set (reg FLAGS_REG) + (compare + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2])) + /* Current assemblers are broken and do not allow @GOTOFF in + ought but a memory context. */ + && ! pic_symbolic_operand (operands[2], VOIDmode)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (operands[2] == const1_rtx) + return "inc{l}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{l}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:SI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + +(define_expand "addhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (PLUS, HImode, operands); DONE;") + +;; %%% After Dave's SUBREG_BYTE stuff goes in, re-enable incb %ah +;; type optimizations enabled by define-splits. This is not important +;; for PII, and in fact harmful because of partial register stalls. + +(define_insn "*addhi_1_lea" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r") + (match_operand:HI 2 "general_operand" "rn,rm,ln"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; + } + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (eq_attr "alternative" "2") + (const_string "lea") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu")))) + (set_attr "mode" "HI,HI,SI")]) + +(define_insn "*addhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rn,rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; + } + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "HI")]) + +(define_insn "*addhi_2" + [(set (reg FLAGS_REG) + (compare + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rmn,rn")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") + (plus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; + } + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "HI")]) + +(define_insn "*addhi_3" + [(set (reg FLAGS_REG) + (compare (neg:HI (match_operand:HI 2 "general_operand" "rmn")) + (match_operand:HI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCZmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; + } + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "HI")]) + +; See comments above addsi_4 for details. +(define_insn "*addhi_4" + [(set (reg FLAGS_REG) + (compare (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:HI 2 "const_int_operand" "n"))) + (clobber (match_scratch:HI 0 "=rm"))] + "ix86_match_ccmode (insn, CCGCmode) + && (INTVAL (operands[2]) & 0xffff) != 0x8000" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == const1_rtx); + return "dec{w}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if ((INTVAL (operands[2]) == -128 + || (INTVAL (operands[2]) > 0 + && INTVAL (operands[2]) != 128))) + return "sub{w}\t{%2, %0|%0, %2}"; + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "SI")]) + + +(define_insn "*addhi_5" + [(set (reg FLAGS_REG) + (compare + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0") + (match_operand:HI 2 "general_operand" "rmn")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCGOCmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{w}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return "dec{w}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{w}\t{%2, %0|%0, %2}"; + } + return "add{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "HI")]) + +(define_expand "addqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (PLUS, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*addqi_1_lea" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,r") + (match_operand:QI 2 "general_operand" "qn,qmn,rn,ln"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, QImode, operands)" +{ + int widen = (which_alternative == 2); + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + if (widen) + return "sub{l}\t{%2, %k0|%k0, %2}"; + else + return "sub{b}\t{%2, %0|%0, %2}"; + } + if (widen) + return "add{l}\t{%k2, %k0|%k0, %k2}"; + else + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (eq_attr "alternative" "3") + (const_string "lea") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu")))) + (set_attr "mode" "QI,QI,SI,SI")]) + +(define_insn "*addqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qn,qmn,rn"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (PLUS, QImode, operands)" +{ + int widen = (which_alternative == 2); + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx); + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. + Exceptions: -128 encodes smaller than 128, so swap sign and op. */ + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + if (widen) + return "sub{l}\t{%2, %k0|%k0, %2}"; + else + return "sub{b}\t{%2, %0|%0, %2}"; + } + if (widen) + return "add{l}\t{%k2, %k0|%k0, %k2}"; + else + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*addqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (plus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qnm"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[1] == const1_rtx) + return "inc{b}\t%0"; + else + { + gcc_assert (operands[1] == constm1_rtx); + return "dec{b}\t%0"; + } + + default: + /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. */ + if (CONST_INT_P (operands[1]) + && INTVAL (operands[1]) < 0) + { + operands[1] = GEN_INT (-INTVAL (operands[1])); + return "sub{b}\t{%1, %0|%0, %1}"; + } + return "add{b}\t{%1, %0|%0, %1}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 1 "incdec_operand" "") + (const_string "incdec") + (const_string "alu1"))) + (set (attr "memory") + (if_then_else (match_operand 1 "memory_operand" "") + (const_string "load") + (const_string "none"))) + (set_attr "mode" "QI")]) + +(define_insn "*addqi_2" + [(set (reg FLAGS_REG) + (compare + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qmn,qn")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (plus:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx + || (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%0"; + } + + default: + /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ + if (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{b}\t{%2, %0|%0, %2}"; + } + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + +(define_insn "*addqi_3" + [(set (reg FLAGS_REG) + (compare (neg:QI (match_operand:QI 2 "general_operand" "qmn")) + (match_operand:QI 1 "nonimmediate_operand" "%0"))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCZmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx + || (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%0"; + } + + default: + /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ + if (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{b}\t{%2, %0|%0, %2}"; + } + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + +; See comments above addsi_4 for details. +(define_insn "*addqi_4" + [(set (reg FLAGS_REG) + (compare (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_int_operand" "n"))) + (clobber (match_scratch:QI 0 "=qm"))] + "ix86_match_ccmode (insn, CCGCmode) + && (INTVAL (operands[2]) & 0xff) != 0x80" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == constm1_rtx + || (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == 255)) + return "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == const1_rtx); + return "dec{b}\t%0"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "add{b}\t{%2, %0|%0, %2}"; + } + return "sub{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:HI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + + +(define_insn "*addqi_5" + [(set (reg FLAGS_REG) + (compare + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "general_operand" "qmn")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCGOCmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%0"; + else + { + gcc_assert (operands[2] == constm1_rtx + || (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%0"; + } + + default: + /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'. */ + if (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) < 0) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{b}\t{%2, %0|%0, %2}"; + } + return "add{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + + +(define_insn "addqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_operand" "Qmn"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%h0"; + else + { + gcc_assert (operands[2] == constm1_rtx + || (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%h0"; + } + + default: + return "add{b}\t{%2, %h0|%h0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + +(define_insn "*addqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "nonmemory_operand" "Qn"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_INCDEC: + if (operands[2] == const1_rtx) + return "inc{b}\t%h0"; + else + { + gcc_assert (operands[2] == constm1_rtx + || (CONST_INT_P (operands[2]) + && INTVAL (operands[2]) == 255)); + return "dec{b}\t%h0"; + } + + default: + return "add{b}\t{%2, %h0|%h0, %2}"; + } +} + [(set (attr "type") + (if_then_else (match_operand:QI 2 "incdec_operand" "") + (const_string "incdec") + (const_string "alu"))) + (set_attr "mode" "QI")]) + +(define_insn "*addqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (plus:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "%0") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "" + "add{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +;; The patterns that match these are at the end of this file. + +(define_expand "addxf3" + [(set (match_operand:XF 0 "register_operand" "") + (plus:XF (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")))] + "TARGET_80387" + "") + +(define_expand "add3" + [(set (match_operand:MODEF 0 "register_operand" "") + (plus:MODEF (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "") + +;; Subtract instructions + +;; %%% splits for subditi3 + +(define_expand "subti3" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (minus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (MINUS, TImode, operands); DONE;") + +(define_insn "*subti3_1" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o") + (minus:TI (match_operand:TI 1 "nonimmediate_operand" "0,0") + (match_operand:TI 2 "x86_64_general_operand" "roe,re"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, TImode, operands)" + "#") + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (minus:TI (match_operand:TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:DI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (minus:DI (match_dup 4) + (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 5)))) + (clobber (reg:CC FLAGS_REG))])] + "split_ti (&operands[0], 3, &operands[0], &operands[3]);") + +;; %%% splits for subsidi3 + +(define_expand "subdi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" "")))] + "" + "ix86_expand_binary_operator (MINUS, DImode, operands); DONE;") + +(define_insn "*subdi3_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:DI 2 "general_operand" "roiF,riF"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" + "#") + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed" + [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) + (parallel [(set (match_dup 3) + (minus:SI (match_dup 4) + (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 5)))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (&operands[0], 3, &operands[0], &operands[3]);") + +(define_insn "subdi3_carry_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (plus:DI (match_operand:DI 3 "ix86_carry_flag_operator" "") + (match_operand:DI 2 "x86_64_general_operand" "re,rm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" + "sbb{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "DI")]) + +(define_insn "*subdi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" + "sub{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*subdi_2_rex64" + [(set (reg FLAGS_REG) + (compare + (minus:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (minus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, DImode, operands)" + "sub{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*subdi_3_rex63" + [(set (reg FLAGS_REG) + (compare (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (minus:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "subqi3_carry" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "") + (match_operand:QI 2 "general_operand" "qn,qm")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, QImode, operands)" + "sbb{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "QI")]) + +(define_insn "subhi3_carry" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "") + (match_operand:HI 2 "general_operand" "rn,rm")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, HImode, operands)" + "sbb{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "HI")]) + +(define_insn "subsi3_carry" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") + (match_operand:SI 2 "general_operand" "ri,rm")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, SImode, operands)" + "sbb{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + +(define_insn "subsi3_carry_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "0") + (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "") + (match_operand:SI 2 "general_operand" "g"))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sbb{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "mode" "SI")]) + +(define_expand "subsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "" + "ix86_expand_binary_operator (MINUS, SImode, operands); DONE;") + +(define_insn "*subsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:SI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "general_operand" "g")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_2" + [(set (reg FLAGS_REG) + (compare + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:SI 2 "general_operand" "ri,rm")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (minus:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_2_zext" + [(set (reg FLAGS_REG) + (compare + (minus:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_dup 1) + (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_3" + [(set (reg FLAGS_REG) + (compare (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:SI 2 "general_operand" "ri,rm"))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (minus:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*subsi_3_zext" + [(set (reg FLAGS_REG) + (compare (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "general_operand" "g"))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (minus:SI (match_dup 1) + (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, SImode, operands)" + "sub{l}\t{%2, %1|%1, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_expand "subhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (minus:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (MINUS, HImode, operands); DONE;") + +(define_insn "*subhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:HI 2 "general_operand" "rn,rm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, HImode, operands)" + "sub{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*subhi_2" + [(set (reg FLAGS_REG) + (compare + (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:HI 2 "general_operand" "rn,rm")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (minus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, HImode, operands)" + "sub{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*subhi_3" + [(set (reg FLAGS_REG) + (compare (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:HI 2 "general_operand" "rn,rm"))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (minus:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, HImode, operands)" + "sub{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_expand "subqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (minus:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (MINUS, QImode, operands); DONE;") + +(define_insn "*subqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "general_operand" "qn,qm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (MINUS, QImode, operands)" + "sub{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*subqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (minus:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qm"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "sub{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*subqi_2" + [(set (reg FLAGS_REG) + (compare + (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "general_operand" "qn,qm")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (minus:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, QImode, operands)" + "sub{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*subqi_3" + [(set (reg FLAGS_REG) + (compare (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "general_operand" "qn,qm"))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") + (minus:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (MINUS, QImode, operands)" + "sub{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +;; The patterns that match these are at the end of this file. + +(define_expand "subxf3" + [(set (match_operand:XF 0 "register_operand" "") + (minus:XF (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")))] + "TARGET_80387" + "") + +(define_expand "sub3" + [(set (match_operand:MODEF 0 "register_operand" "") + (minus:MODEF (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "") + +;; Multiply instructions + +(define_expand "muldi3" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "x86_64_general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "") + +;; On AMDFAM10 +;; IMUL reg64, reg64, imm8 Direct +;; IMUL reg64, mem64, imm8 VectorPath +;; IMUL reg64, reg64, imm32 Direct +;; IMUL reg64, mem64, imm32 VectorPath +;; IMUL reg64, reg64 Direct +;; IMUL reg64, mem64 Direct + +(define_insn "*muldi3_1_rex64" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:DI 2 "x86_64_general_operand" "K,e,mr"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + imul{q}\t{%2, %1, %0|%0, %1, %2} + imul{q}\t{%2, %1, %0|%0, %1, %2} + imul{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set_attr "mode" "DI")]) + +(define_expand "mulsi3" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +;; On AMDFAM10 +;; IMUL reg32, reg32, imm8 Direct +;; IMUL reg32, mem32, imm8 VectorPath +;; IMUL reg32, reg32, imm32 Direct +;; IMUL reg32, mem32, imm32 VectorPath +;; IMUL reg32, reg32 Direct +;; IMUL reg32, mem32 Direct + +(define_insn "*mulsi3_1" + [(set (match_operand:SI 0 "register_operand" "=r,r,r") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:SI 2 "general_operand" "K,i,mr"))) + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + imul{l}\t{%2, %1, %0|%0, %1, %2} + imul{l}\t{%2, %1, %0|%0, %1, %2} + imul{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set_attr "mode" "SI")]) + +(define_insn "*mulsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:SI 2 "general_operand" "K,i,mr")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + imul{l}\t{%2, %1, %k0|%k0, %1, %2} + imul{l}\t{%2, %1, %k0|%k0, %1, %2} + imul{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1") + (const_string "vector") + (and (eq_attr "alternative" "2") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(and (eq_attr "alternative" "0,1") + (match_operand 1 "memory_operand" "")) + (const_string "vector")] + (const_string "direct"))) + (set_attr "mode" "SI")]) + +(define_expand "mulhi3" + [(parallel [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_HIMODE_MATH" + "") + +;; On AMDFAM10 +;; IMUL reg16, reg16, imm8 VectorPath +;; IMUL reg16, mem16, imm8 VectorPath +;; IMUL reg16, reg16, imm16 VectorPath +;; IMUL reg16, mem16, imm16 VectorPath +;; IMUL reg16, reg16 Direct +;; IMUL reg16, mem16 Direct +(define_insn "*mulhi3_1" + [(set (match_operand:HI 0 "register_operand" "=r,r,r") + (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0") + (match_operand:HI 2 "general_operand" "K,n,mr"))) + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + imul{w}\t{%2, %1, %0|%0, %1, %2} + imul{w}\t{%2, %1, %0|%0, %1, %2} + imul{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "imul") + (set_attr "prefix_0f" "0,0,1") + (set (attr "athlon_decode") + (cond [(eq_attr "cpu" "athlon") + (const_string "vector") + (eq_attr "alternative" "1,2") + (const_string "vector")] + (const_string "direct"))) + (set (attr "amdfam10_decode") + (cond [(eq_attr "alternative" "0,1") + (const_string "vector")] + (const_string "direct"))) + (set_attr "mode" "HI")]) + +(define_expand "mulqi3" + [(parallel [(set (match_operand:QI 0 "register_operand" "") + (mult:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" + "") + +;;On AMDFAM10 +;; MUL reg8 Direct +;; MUL mem8 Direct + +(define_insn "*mulqi3_1" + [(set (match_operand:QI 0 "register_operand" "=a") + (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "nonimmediate_operand" "qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{b}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) + (set_attr "amdfam10_decode" "direct") + (set_attr "mode" "QI")]) + +(define_expand "umulqihi3" + [(parallel [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (zero_extend:HI + (match_operand:QI 1 "nonimmediate_operand" "")) + (zero_extend:HI + (match_operand:QI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" + "") + +(define_insn "*umulqihi3_1" + [(set (match_operand:HI 0 "register_operand" "=a") + (mult:HI (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0")) + (zero_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{b}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) + (set_attr "amdfam10_decode" "direct") + (set_attr "mode" "QI")]) + +(define_expand "mulqihi3" + [(parallel [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")) + (sign_extend:HI (match_operand:QI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_QIMODE_MATH" + "") + +(define_insn "*mulqihi3_insn" + [(set (match_operand:HI 0 "register_operand" "=a") + (mult:HI (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "%0")) + (sign_extend:HI (match_operand:QI 2 "nonimmediate_operand" "qm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "imul{b}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "direct"))) + (set_attr "amdfam10_decode" "direct") + (set_attr "mode" "QI")]) + +(define_expand "umulditi3" + [(parallel [(set (match_operand:TI 0 "register_operand" "") + (mult:TI (zero_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "")) + (zero_extend:TI + (match_operand:DI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "") + +(define_insn "*umulditi3_insn" + [(set (match_operand:TI 0 "register_operand" "=A") + (mult:TI (zero_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0")) + (zero_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{q}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "mode" "DI")]) + +;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers +(define_expand "umulsidi3" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (zero_extend:DI + (match_operand:SI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "!TARGET_64BIT" + "") + +(define_insn "*umulsidi3_insn" + [(set (match_operand:DI 0 "register_operand" "=A") + (mult:DI (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0")) + (zero_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "mode" "SI")]) + +(define_expand "mulditi3" + [(parallel [(set (match_operand:TI 0 "register_operand" "") + (mult:TI (sign_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "")) + (sign_extend:TI + (match_operand:DI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "") + +(define_insn "*mulditi3_insn" + [(set (match_operand:TI 0 "register_operand" "=A") + (mult:TI (sign_extend:TI (match_operand:DI 1 "nonimmediate_operand" "%0")) + (sign_extend:TI (match_operand:DI 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "imul{q}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "mode" "DI")]) + +(define_expand "mulsidi3" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (sign_extend:DI + (match_operand:SI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "!TARGET_64BIT" + "") + +(define_insn "*mulsidi3_insn" + [(set (match_operand:DI 0 "register_operand" "=A") + (mult:DI (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0")) + (sign_extend:DI (match_operand:SI 2 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "imul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "mode" "SI")]) + +(define_expand "umuldi3_highpart" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "")) + (zero_extend:TI + (match_operand:DI 2 "register_operand" ""))) + (const_int 64)))) + (clobber (match_scratch:DI 3 "")) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "") + +(define_insn "*umuldi3_highpart_rex64" + [(set (match_operand:DI 0 "register_operand" "=d") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "%a")) + (zero_extend:TI + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (const_int 64)))) + (clobber (match_scratch:DI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{q}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "mode" "DI")]) + +(define_expand "umulsi3_highpart" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (zero_extend:DI + (match_operand:SI 2 "register_operand" ""))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "")) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_insn "*umulsi3_highpart_insn" + [(set (match_operand:SI 0 "register_operand" "=d") + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "%a")) + (zero_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "mode" "SI")]) + +(define_insn "*umulsi3_highpart_zext" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "%a")) + (zero_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32))))) + (clobber (match_scratch:SI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mul{l}\t%2" + [(set_attr "type" "imul") + (set_attr "length_immediate" "0") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "mode" "SI")]) + +(define_expand "smuldi3_highpart" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (truncate:DI + (lshiftrt:TI + (mult:TI (sign_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "")) + (sign_extend:TI + (match_operand:DI 2 "register_operand" ""))) + (const_int 64)))) + (clobber (match_scratch:DI 3 "")) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "") + +(define_insn "*smuldi3_highpart_rex64" + [(set (match_operand:DI 0 "register_operand" "=d") + (truncate:DI + (lshiftrt:TI + (mult:TI (sign_extend:TI + (match_operand:DI 1 "nonimmediate_operand" "%a")) + (sign_extend:TI + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (const_int 64)))) + (clobber (match_scratch:DI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "imul{q}\t%2" + [(set_attr "type" "imul") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "mode" "DI")]) + +(define_expand "smulsi3_highpart" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (truncate:SI + (lshiftrt:DI + (mult:DI (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "")) + (sign_extend:DI + (match_operand:SI 2 "register_operand" ""))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "")) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +(define_insn "*smulsi3_highpart_insn" + [(set (match_operand:SI 0 "register_operand" "=d") + (truncate:SI + (lshiftrt:DI + (mult:DI (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "%a")) + (sign_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32)))) + (clobber (match_scratch:SI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "imul{l}\t%2" + [(set_attr "type" "imul") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "mode" "SI")]) + +(define_insn "*smulsi3_highpart_zext" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (truncate:SI + (lshiftrt:DI + (mult:DI (sign_extend:DI + (match_operand:SI 1 "nonimmediate_operand" "%a")) + (sign_extend:DI + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 32))))) + (clobber (match_scratch:SI 3 "=1")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "imul{l}\t%2" + [(set_attr "type" "imul") + (set (attr "athlon_decode") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))) + (set_attr "amdfam10_decode" "double") + (set_attr "mode" "SI")]) + +;; The patterns that match these are at the end of this file. + +(define_expand "mulxf3" + [(set (match_operand:XF 0 "register_operand" "") + (mult:XF (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")))] + "TARGET_80387" + "") + +(define_expand "mul3" + [(set (match_operand:MODEF 0 "register_operand" "") + (mult:MODEF (match_operand:MODEF 1 "register_operand" "") + (match_operand:MODEF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "") + +;; SSE5 scalar multiply/add instructions are defined in sse.md. + + +;; Divide instructions + +(define_insn "divqi3" + [(set (match_operand:QI 0 "register_operand" "=a") + (div:QI (match_operand:HI 1 "register_operand" "0") + (match_operand:QI 2 "nonimmediate_operand" "qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "idiv{b}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "QI")]) + +(define_insn "udivqi3" + [(set (match_operand:QI 0 "register_operand" "=a") + (udiv:QI (match_operand:HI 1 "register_operand" "0") + (match_operand:QI 2 "nonimmediate_operand" "qm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_QIMODE_MATH" + "div{b}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "QI")]) + +;; The patterns that match these are at the end of this file. + +(define_expand "divxf3" + [(set (match_operand:XF 0 "register_operand" "") + (div:XF (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")))] + "TARGET_80387" + "") + +(define_expand "divdf3" + [(set (match_operand:DF 0 "register_operand" "") + (div:DF (match_operand:DF 1 "register_operand" "") + (match_operand:DF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" + "") + +(define_expand "divsf3" + [(set (match_operand:SF 0 "register_operand" "") + (div:SF (match_operand:SF 1 "register_operand" "") + (match_operand:SF 2 "nonimmediate_operand" "")))] + "TARGET_80387 || TARGET_SSE_MATH" +{ + if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p () + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + ix86_emit_swdivsf (operands[0], operands[1], + operands[2], SFmode); + DONE; + } +}) + +;; Remainder instructions. + +(define_expand "divmoddi4" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (div:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonimmediate_operand" ""))) + (set (match_operand:DI 3 "register_operand" "") + (mod:DI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "") + +;; Allow to come the parameter in eax or edx to avoid extra moves. +;; Penalize eax case slightly because it results in worse scheduling +;; of code. +(define_insn "*divmoddi4_nocltd_rex64" + [(set (match_operand:DI 0 "register_operand" "=&a,?a") + (div:DI (match_operand:DI 2 "register_operand" "1,0") + (match_operand:DI 3 "nonimmediate_operand" "rm,rm"))) + (set (match_operand:DI 1 "register_operand" "=&d,&d") + (mod:DI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && optimize_function_for_speed_p (cfun) && !TARGET_USE_CLTD" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*divmoddi4_cltd_rex64" + [(set (match_operand:DI 0 "register_operand" "=a") + (div:DI (match_operand:DI 2 "register_operand" "a") + (match_operand:DI 3 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 1 "register_operand" "=&d") + (mod:DI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*divmoddi_noext_rex64" + [(set (match_operand:DI 0 "register_operand" "=a") + (div:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 3 "register_operand" "=d") + (mod:DI (match_dup 1) (match_dup 2))) + (use (match_operand:DI 4 "register_operand" "3")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "idiv{q}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (div:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonimmediate_operand" ""))) + (set (match_operand:DI 3 "register_operand" "") + (mod:DI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel [(set (match_dup 3) + (ashiftrt:DI (match_dup 4) (const_int 63))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (div:DI (reg:DI 0) (match_dup 2))) + (set (match_dup 3) + (mod:DI (reg:DI 0) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])] +{ + /* Avoid use of cltd in favor of a mov+shift. */ + if (!TARGET_USE_CLTD && optimize_function_for_speed_p (cfun)) + { + if (true_regnum (operands[1])) + emit_move_insn (operands[0], operands[1]); + else + emit_move_insn (operands[3], operands[1]); + operands[4] = operands[3]; + } + else + { + gcc_assert (!true_regnum (operands[1])); + operands[4] = operands[1]; + } +}) + + +(define_expand "divmodsi4" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (div:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonimmediate_operand" ""))) + (set (match_operand:SI 3 "register_operand" "") + (mod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "" + "") + +;; Allow to come the parameter in eax or edx to avoid extra moves. +;; Penalize eax case slightly because it results in worse scheduling +;; of code. +(define_insn "*divmodsi4_nocltd" + [(set (match_operand:SI 0 "register_operand" "=&a,?a") + (div:SI (match_operand:SI 2 "register_operand" "1,0") + (match_operand:SI 3 "nonimmediate_operand" "rm,rm"))) + (set (match_operand:SI 1 "register_operand" "=&d,&d") + (mod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "optimize_function_for_speed_p (cfun) && !TARGET_USE_CLTD" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*divmodsi4_cltd" + [(set (match_operand:SI 0 "register_operand" "=a") + (div:SI (match_operand:SI 2 "register_operand" "a") + (match_operand:SI 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SI 1 "register_operand" "=&d") + (mod:SI (match_dup 2) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))] + "optimize_function_for_size_p (cfun) || TARGET_USE_CLTD" + "#" + [(set_attr "type" "multi")]) + +(define_insn "*divmodsi_noext" + [(set (match_operand:SI 0 "register_operand" "=a") + (div:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:SI 3 "register_operand" "=d") + (mod:SI (match_dup 1) (match_dup 2))) + (use (match_operand:SI 4 "register_operand" "3")) + (clobber (reg:CC FLAGS_REG))] + "" + "idiv{l}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (div:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonimmediate_operand" ""))) + (set (match_operand:SI 3 "register_operand" "") + (mod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 3) + (ashiftrt:SI (match_dup 4) (const_int 31))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) + (div:SI (reg:SI 0) (match_dup 2))) + (set (match_dup 3) + (mod:SI (reg:SI 0) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])] +{ + /* Avoid use of cltd in favor of a mov+shift. */ + if (!TARGET_USE_CLTD && optimize_function_for_speed_p (cfun)) + { + if (true_regnum (operands[1])) + emit_move_insn (operands[0], operands[1]); + else + emit_move_insn (operands[3], operands[1]); + operands[4] = operands[3]; + } + else + { + gcc_assert (!true_regnum (operands[1])); + operands[4] = operands[1]; + } +}) +;; %%% Split me. +(define_insn "divmodhi4" + [(set (match_operand:HI 0 "register_operand" "=a") + (div:HI (match_operand:HI 1 "register_operand" "0") + (match_operand:HI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:HI 3 "register_operand" "=&d") + (mod:HI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_HIMODE_MATH" + "cwtd\;idiv{w}\t%2" + [(set_attr "type" "multi") + (set_attr "length_immediate" "0") + (set_attr "mode" "SI")]) + +(define_insn "udivmoddi4" + [(set (match_operand:DI 0 "register_operand" "=a") + (udiv:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 3 "register_operand" "=&d") + (umod:DI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "xor{q}\t%3, %3\;div{q}\t%2" + [(set_attr "type" "multi") + (set_attr "length_immediate" "0") + (set_attr "mode" "DI")]) + +(define_insn "*udivmoddi4_noext" + [(set (match_operand:DI 0 "register_operand" "=a") + (udiv:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 3 "register_operand" "=d") + (umod:DI (match_dup 1) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "div{q}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (udiv:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonimmediate_operand" ""))) + (set (match_operand:DI 3 "register_operand" "") + (umod:DI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(set (match_dup 3) (const_int 0)) + (parallel [(set (match_dup 0) + (udiv:DI (match_dup 1) (match_dup 2))) + (set (match_dup 3) + (umod:DI (match_dup 1) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_insn "udivmodsi4" + [(set (match_operand:SI 0 "register_operand" "=a") + (udiv:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:SI 3 "register_operand" "=&d") + (umod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "" + "xor{l}\t%3, %3\;div{l}\t%2" + [(set_attr "type" "multi") + (set_attr "length_immediate" "0") + (set_attr "mode" "SI")]) + +(define_insn "*udivmodsi4_noext" + [(set (match_operand:SI 0 "register_operand" "=a") + (udiv:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:SI 3 "register_operand" "=d") + (umod:SI (match_dup 1) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC FLAGS_REG))] + "" + "div{l}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (udiv:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nonimmediate_operand" ""))) + (set (match_operand:SI 3 "register_operand" "") + (umod:SI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(set (match_dup 3) (const_int 0)) + (parallel [(set (match_dup 0) + (udiv:SI (match_dup 1) (match_dup 2))) + (set (match_dup 3) + (umod:SI (match_dup 1) (match_dup 2))) + (use (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_expand "udivmodhi4" + [(set (match_dup 4) (const_int 0)) + (parallel [(set (match_operand:HI 0 "register_operand" "") + (udiv:HI (match_operand:HI 1 "register_operand" "") + (match_operand:HI 2 "nonimmediate_operand" ""))) + (set (match_operand:HI 3 "register_operand" "") + (umod:HI (match_dup 1) (match_dup 2))) + (use (match_dup 4)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_HIMODE_MATH" + "operands[4] = gen_reg_rtx (HImode);") + +(define_insn "*udivmodhi_noext" + [(set (match_operand:HI 0 "register_operand" "=a") + (udiv:HI (match_operand:HI 1 "register_operand" "0") + (match_operand:HI 2 "nonimmediate_operand" "rm"))) + (set (match_operand:HI 3 "register_operand" "=d") + (umod:HI (match_dup 1) (match_dup 2))) + (use (match_operand:HI 4 "register_operand" "3")) + (clobber (reg:CC FLAGS_REG))] + "" + "div{w}\t%2" + [(set_attr "type" "idiv") + (set_attr "mode" "HI")]) + +;; We cannot use div/idiv for double division, because it causes +;; "division by zero" on the overflow and that's not what we expect +;; from truncate. Because true (non truncating) double division is +;; never generated, we can't create this insn anyway. +; +;(define_insn "" +; [(set (match_operand:SI 0 "register_operand" "=a") +; (truncate:SI +; (udiv:DI (match_operand:DI 1 "register_operand" "A") +; (zero_extend:DI +; (match_operand:SI 2 "nonimmediate_operand" "rm"))))) +; (set (match_operand:SI 3 "register_operand" "=d") +; (truncate:SI +; (umod:DI (match_dup 1) (zero_extend:DI (match_dup 2))))) +; (clobber (reg:CC FLAGS_REG))] +; "" +; "div{l}\t{%2, %0|%0, %2}" +; [(set_attr "type" "idiv")]) + +;;- Logical AND instructions + +;; On Pentium, "test imm, reg" is pairable only with eax, ax, and al. +;; Note that this excludes ah. + +(define_insn "*testdi_1_rex64" + [(set (reg FLAGS_REG) + (compare + (and:DI (match_operand:DI 0 "nonimmediate_operand" "%!*a,r,!*a,r,rm") + (match_operand:DI 1 "x86_64_szext_general_operand" "Z,Z,e,e,re")) + (const_int 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + test{l}\t{%k1, %k0|%k0, %k1} + test{l}\t{%k1, %k0|%k0, %k1} + test{q}\t{%1, %0|%0, %1} + test{q}\t{%1, %0|%0, %1} + test{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,0,1,1") + (set_attr "mode" "SI,SI,DI,DI,DI") + (set_attr "pent_pair" "uv,np,uv,np,uv")]) + +(define_insn "testsi_1" + [(set (reg FLAGS_REG) + (compare + (and:SI (match_operand:SI 0 "nonimmediate_operand" "%!*a,r,rm") + (match_operand:SI 1 "general_operand" "i,i,ri")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "test{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1") + (set_attr "mode" "SI") + (set_attr "pent_pair" "uv,np,uv")]) + +(define_expand "testsi_ccno_1" + [(set (reg:CCNO FLAGS_REG) + (compare:CCNO + (and:SI (match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:SI 1 "nonmemory_operand" "")) + (const_int 0)))] + "" + "") + +(define_insn "*testhi_1" + [(set (reg FLAGS_REG) + (compare (and:HI (match_operand:HI 0 "nonimmediate_operand" "%!*a,r,rm") + (match_operand:HI 1 "general_operand" "n,n,rn")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "test{w}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1") + (set_attr "mode" "HI") + (set_attr "pent_pair" "uv,np,uv")]) + +(define_expand "testqi_ccz_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand" "") + (match_operand:QI 1 "nonmemory_operand" "")) + (const_int 0)))] + "" + "") + +(define_insn "*testqi_1_maybe_si" + [(set (reg FLAGS_REG) + (compare + (and:QI + (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm,r") + (match_operand:QI 1 "general_operand" "n,n,qn,n")) + (const_int 0)))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && ix86_match_ccmode (insn, + CONST_INT_P (operands[1]) + && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)" +{ + if (which_alternative == 3) + { + if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) < 0) + operands[1] = GEN_INT (INTVAL (operands[1]) & 0xff); + return "test{l}\t{%1, %k0|%k0, %1}"; + } + return "test{b}\t{%1, %0|%0, %1}"; +} + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1,1") + (set_attr "mode" "QI,QI,QI,SI") + (set_attr "pent_pair" "uv,np,uv,np")]) + +(define_insn "*testqi_1" + [(set (reg FLAGS_REG) + (compare + (and:QI + (match_operand:QI 0 "nonimmediate_operand" "%!*a,q,qm") + (match_operand:QI 1 "general_operand" "n,n,qn")) + (const_int 0)))] + "!(MEM_P (operands[0]) && MEM_P (operands[1])) + && ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "test") + (set_attr "modrm" "0,1,1") + (set_attr "mode" "QI") + (set_attr "pent_pair" "uv,np,uv")]) + +(define_expand "testqi_ext_ccno_0" + [(set (reg:CCNO FLAGS_REG) + (compare:CCNO + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand 1 "const_int_operand" "")) + (const_int 0)))] + "" + "") + +(define_insn "*testqi_ext_0" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (match_operand 1 "const_int_operand" "n")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "test") + (set_attr "mode" "QI") + (set_attr "length_immediate" "1") + (set_attr "pent_pair" "np")]) + +(define_insn "*testqi_ext_1" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 1 "general_operand" "Qm"))) + (const_int 0)))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "test") + (set_attr "mode" "QI")]) + +(define_insn "*testqi_ext_1_rex64" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 1 "register_operand" "Q"))) + (const_int 0)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "test") + (set_attr "mode" "QI")]) + +(define_insn "*testqi_ext_2" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 0 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 1 "ext_register_operand" "Q") + (const_int 8) + (const_int 8))) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode)" + "test{b}\t{%h1, %h0|%h0, %h1}" + [(set_attr "type" "test") + (set_attr "mode" "QI")]) + +;; Combine likes to form bit extractions for some tests. Humor it. +(define_insn "*testqi_ext_3" + [(set (reg FLAGS_REG) + (compare (zero_extract:SI + (match_operand 0 "nonimmediate_operand" "rm") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode) + && INTVAL (operands[1]) > 0 + && INTVAL (operands[2]) >= 0 + && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32 + && (GET_MODE (operands[0]) == SImode + || (TARGET_64BIT && GET_MODE (operands[0]) == DImode) + || GET_MODE (operands[0]) == HImode + || GET_MODE (operands[0]) == QImode)" + "#") + +(define_insn "*testqi_ext_3_rex64" + [(set (reg FLAGS_REG) + (compare (zero_extract:DI + (match_operand 0 "nonimmediate_operand" "rm") + (match_operand:DI 1 "const_int_operand" "") + (match_operand:DI 2 "const_int_operand" "")) + (const_int 0)))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && INTVAL (operands[1]) > 0 + && INTVAL (operands[2]) >= 0 + /* Ensure that resulting mask is zero or sign extended operand. */ + && (INTVAL (operands[1]) + INTVAL (operands[2]) <= 32 + || (INTVAL (operands[1]) + INTVAL (operands[2]) == 64 + && INTVAL (operands[1]) > 32)) + && (GET_MODE (operands[0]) == SImode + || GET_MODE (operands[0]) == DImode + || GET_MODE (operands[0]) == HImode + || GET_MODE (operands[0]) == QImode)" + "#") + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(zero_extract + (match_operand 2 "nonimmediate_operand" "") + (match_operand 3 "const_int_operand" "") + (match_operand 4 "const_int_operand" "")) + (const_int 0)]))] + "ix86_match_ccmode (insn, CCNOmode)" + [(set (match_dup 0) (match_op_dup 1 [(match_dup 2) (const_int 0)]))] +{ + rtx val = operands[2]; + HOST_WIDE_INT len = INTVAL (operands[3]); + HOST_WIDE_INT pos = INTVAL (operands[4]); + HOST_WIDE_INT mask; + enum machine_mode mode, submode; + + mode = GET_MODE (val); + if (MEM_P (val)) + { + /* ??? Combine likes to put non-volatile mem extractions in QImode + no matter the size of the test. So find a mode that works. */ + if (! MEM_VOLATILE_P (val)) + { + mode = smallest_mode_for_size (pos + len, MODE_INT); + val = adjust_address (val, mode, 0); + } + } + else if (GET_CODE (val) == SUBREG + && (submode = GET_MODE (SUBREG_REG (val)), + GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (submode)) + && pos + len <= GET_MODE_BITSIZE (submode) + && GET_MODE_CLASS (submode) == MODE_INT) + { + /* Narrow a paradoxical subreg to prevent partial register stalls. */ + mode = submode; + val = SUBREG_REG (val); + } + else if (mode == HImode && pos + len <= 8) + { + /* Small HImode tests can be converted to QImode. */ + mode = QImode; + val = gen_lowpart (QImode, val); + } + + if (len == HOST_BITS_PER_WIDE_INT) + mask = -1; + else + mask = ((HOST_WIDE_INT)1 << len) - 1; + mask <<= pos; + + operands[2] = gen_rtx_AND (mode, val, gen_int_mode (mask, mode)); +}) + +;; Convert HImode/SImode test instructions with immediate to QImode ones. +;; i386 does not allow to encode test with 8bit sign extended immediate, so +;; this is relatively important trick. +;; Do the conversion only post-reload to avoid limiting of the register class +;; to QI regs. +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand 2 "register_operand" "") + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] + "reload_completed + && QI_REG_P (operands[2]) + && GET_MODE (operands[2]) != QImode + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[3]) & ~(255 << 8))) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[3]) & ~(127 << 8))))" + [(set (match_dup 0) + (match_op_dup 1 + [(and:SI (zero_extract:SI (match_dup 2) (const_int 8) (const_int 8)) + (match_dup 3)) + (const_int 0)]))] + "operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_int_mode (INTVAL (operands[3]) >> 8, SImode);") + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand 2 "nonimmediate_operand" "") + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] + "reload_completed + && GET_MODE (operands[2]) != QImode + && (!REG_P (operands[2]) || ANY_QI_REG_P (operands[2])) + && ((ix86_match_ccmode (insn, CCZmode) + && !(INTVAL (operands[3]) & ~255)) + || (ix86_match_ccmode (insn, CCNOmode) + && !(INTVAL (operands[3]) & ~127)))" + [(set (match_dup 0) + (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) + (const_int 0)]))] + "operands[2] = gen_lowpart (QImode, operands[2]); + operands[3] = gen_lowpart (QImode, operands[3]);") + + +;; %%% This used to optimize known byte-wide and operations to memory, +;; and sometimes to QImode registers. If this is considered useful, +;; it should be done with splitters. + +(define_expand "anddi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (and:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_szext_general_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (AND, DImode, operands); DONE;") + +(define_insn "*anddi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") + (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + { + enum machine_mode mode; + + gcc_assert (CONST_INT_P (operands[2])); + if (INTVAL (operands[2]) == 0xff) + mode = QImode; + else + { + gcc_assert (INTVAL (operands[2]) == 0xffff); + mode = HImode; + } + + operands[1] = gen_lowpart (mode, operands[1]); + if (mode == QImode) + return "movz{bq|x}\t{%1,%0|%0, %1}"; + else + return "movz{wq|x}\t{%1,%0|%0, %1}"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + if (get_attr_mode (insn) == MODE_SI) + return "and{l}\t{%k2, %k0|%k0, %k2}"; + else + return "and{q}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "type" "alu,alu,alu,imovx") + (set_attr "length_immediate" "*,*,*,0") + (set_attr "mode" "SI,DI,DI,DI")]) + +(define_insn "*anddi_2" + [(set (reg FLAGS_REG) + (compare (and:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,rem,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm") + (and:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, DImode, operands)" + "@ + and{l}\t{%k2, %k0|%k0, %k2} + and{q}\t{%2, %0|%0, %2} + and{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI,DI,DI")]) + +(define_expand "andsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "" + "ix86_expand_binary_operator (AND, SImode, operands); DONE;") + +(define_insn "*andsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,r") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm") + (match_operand:SI 2 "general_operand" "ri,rm,L"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (AND, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + { + enum machine_mode mode; + + gcc_assert (CONST_INT_P (operands[2])); + if (INTVAL (operands[2]) == 0xff) + mode = QImode; + else + { + gcc_assert (INTVAL (operands[2]) == 0xffff); + mode = HImode; + } + + operands[1] = gen_lowpart (mode, operands[1]); + if (mode == QImode) + return "movz{bl|x}\t{%1,%0|%0, %1}"; + else + return "movz{wl|x}\t{%1,%0|%0, %1}"; + } + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "and{l}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "type" "alu,alu,imovx") + (set_attr "length_immediate" "*,*,0") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_dup 0) + (const_int -65536))) + (clobber (reg:CC FLAGS_REG))] + "optimize_function_for_size_p (cfun) || (TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)" + [(set (strict_low_part (match_dup 1)) (const_int 0))] + "operands[1] = gen_lowpart (HImode, operands[0]);") + +(define_split + [(set (match_operand 0 "ext_register_operand" "") + (and (match_dup 0) + (const_int -256))) + (clobber (reg:CC FLAGS_REG))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_REG_STALL) && reload_completed" + [(set (strict_low_part (match_dup 1)) (const_int 0))] + "operands[1] = gen_lowpart (QImode, operands[0]);") + +(define_split + [(set (match_operand 0 "ext_register_operand" "") + (and (match_dup 0) + (const_int -65281))) + (clobber (reg:CC FLAGS_REG))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_REG_STALL) && reload_completed" + [(parallel [(set (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_dup 0) + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]);") + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*andsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)" + "and{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*andsi_2" + [(set (reg FLAGS_REG) + (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "g,ri")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") + (and:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, SImode, operands)" + "and{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*andsi_2_zext" + [(set (reg FLAGS_REG) + (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, SImode, operands)" + "and{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_expand "andhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (AND, HImode, operands); DONE;") + +(define_insn "*andhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r") + (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm") + (match_operand:HI 2 "general_operand" "rn,rm,L"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (AND, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOVX: + gcc_assert (CONST_INT_P (operands[2])); + gcc_assert (INTVAL (operands[2]) == 0xff); + return "movz{bl|x}\t{%b1, %k0|%k0, %b1}"; + + default: + gcc_assert (rtx_equal_p (operands[0], operands[1])); + + return "and{w}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "type" "alu,alu,imovx") + (set_attr "length_immediate" "*,*,0") + (set_attr "mode" "HI,HI,SI")]) + +(define_insn "*andhi_2" + [(set (reg FLAGS_REG) + (compare (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rmn,rn")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") + (and:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, HImode, operands)" + "and{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_expand "andqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (and:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (AND, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*andqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") + (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qn,qmn,rn"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (AND, QImode, operands)" + "@ + and{b}\t{%2, %0|%0, %2} + and{b}\t{%2, %0|%0, %2} + and{l}\t{%k2, %k0|%k0, %k2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*andqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (and:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qmn"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "and{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_2_maybe_si" + [(set (reg FLAGS_REG) + (compare (and:QI + (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qmn,qn,n")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r") + (and:QI (match_dup 1) (match_dup 2)))] + "ix86_binary_operator_ok (AND, QImode, operands) + && ix86_match_ccmode (insn, + CONST_INT_P (operands[2]) + && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)" +{ + if (which_alternative == 2) + { + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0) + operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff); + return "and{l}\t{%2, %k0|%k0, %2}"; + } + return "and{b}\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*andqi_2" + [(set (reg FLAGS_REG) + (compare (and:QI + (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qmn,qn")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (and:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (AND, QImode, operands)" + "and{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_2_slp" + [(set (reg FLAGS_REG) + (compare (and:QI + (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "nonimmediate_operand" "qmn,qn")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (and:QI (match_dup 0) (match_dup 1)))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "and{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +;; ??? A bug in recog prevents it from recognizing a const_int as an +;; operand to zero_extend in andqi_ext_1. It was checking explicitly +;; for a QImode operand, which of course failed. + +(define_insn "andqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + +;; Generated by peephole translating test to and. This shows up +;; often in fp comparisons. + +(define_insn "*andqi_ext_0_cc" + [(set (reg FLAGS_REG) + (compare + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_dup 1) + (const_int 8) + (const_int 8)) + (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode)" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "and{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*andqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "%0") + (const_int 8) + (const_int 8)) + (zero_extract:SI + (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "" + "and{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +;; Convert wide AND instructions with immediate operand to shorter QImode +;; equivalents when possible. +;; Don't do the splitting with memory operands, since it introduces risk +;; of memory mismatch stalls. We may want to do the splitting for optimizing +;; for size, but that can (should?) be handled by generic code instead. +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(~INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (and:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since AND can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is not set. +(define_split + [(set (match_operand 0 "register_operand" "") + (and (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(~INTVAL (operands[2]) & ~255) + && !(INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (and:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") + +;; Logical inclusive OR instructions + +;; %%% This used to optimize known byte-wide and operations to memory. +;; If this is considered useful, it should be done with splitters. + +(define_expand "iordi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (ior:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (IOR, DImode, operands); DONE;") + +(define_insn "*iordi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rme"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && ix86_binary_operator_ok (IOR, DImode, operands)" + "or{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*iordi_2_rex64" + [(set (reg FLAGS_REG) + (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "rem,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm") + (ior:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, DImode, operands)" + "or{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*iordi_3_rex64" + [(set (reg FLAGS_REG) + (compare (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0") + (match_operand:DI 2 "x86_64_general_operand" "rem")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, DImode, operands)" + "or{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + + +(define_expand "iorsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (ior:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "" + "ix86_expand_binary_operator (IOR, SImode, operands); DONE;") + +(define_insn "*iorsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "ri,g"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +(define_insn "*iorsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*iorsi_1_zext_imm" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) + (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "or{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*iorsi_2" + [(set (reg FLAGS_REG) + (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "g,ri")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") + (ior:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +;; ??? Special case for immediate operand is missing - it is tricky. +(define_insn "*iorsi_2_zext" + [(set (reg FLAGS_REG) + (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ior:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*iorsi_2_zext_imm" + [(set (reg FLAGS_REG) + (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand 2 "x86_64_zext_immediate_operand" "Z")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, SImode, operands)" + "or{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*iorsi_3" + [(set (reg FLAGS_REG) + (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "or{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_expand "iorhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (ior:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (IOR, HImode, operands); DONE;") + +(define_insn "*iorhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m") + (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rmn,rn"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (IOR, HImode, operands)" + "or{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*iorhi_2" + [(set (reg FLAGS_REG) + (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rmn,rn")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") + (ior:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, HImode, operands)" + "or{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*iorhi_3" + [(set (reg FLAGS_REG) + (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0") + (match_operand:HI 2 "general_operand" "rmn")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "or{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_expand "iorqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (ior:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (IOR, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*iorqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r") + (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qmn,qn,rn"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (IOR, QImode, operands)" + "@ + or{b}\t{%2, %0|%0, %2} + or{b}\t{%2, %0|%0, %2} + or{l}\t{%k2, %k0|%k0, %k2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*iorqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+q,m")) + (ior:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qmn,qn"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "or{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_2" + [(set (reg FLAGS_REG) + (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qmn,qn")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (ior:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (IOR, QImode, operands)" + "or{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_2_slp" + [(set (reg FLAGS_REG) + (compare (ior:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "general_operand" "qmn,qn")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (ior:QI (match_dup 0) (match_dup 1)))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "or{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_3" + [(set (reg FLAGS_REG) + (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "general_operand" "qmn")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "or{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "or{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*iorqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (ior:SI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "or{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (ior (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (ior:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since OR can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is set. +(define_split + [(set (match_operand 0 "register_operand" "") + (ior (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(INTVAL (operands[2]) & ~255) + && (INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (ior:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") + +;; Logical XOR instructions + +;; %%% This used to optimize known byte-wide and operations to memory. +;; If this is considered useful, it should be done with splitters. + +(define_expand "xordi3" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (xor:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "x86_64_general_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (XOR, DImode, operands); DONE;") + +(define_insn "*xordi_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "re,rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && ix86_binary_operator_ok (XOR, DImode, operands)" + "xor{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*xordi_2_rex64" + [(set (reg FLAGS_REG) + (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0") + (match_operand:DI 2 "x86_64_general_operand" "rem,re")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm") + (xor:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, DImode, operands)" + "xor{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_insn "*xordi_3_rex64" + [(set (reg FLAGS_REG) + (compare (xor:DI (match_operand:DI 1 "nonimmediate_operand" "%0") + (match_operand:DI 2 "x86_64_general_operand" "rem")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, DImode, operands)" + "xor{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "DI")]) + +(define_expand "xorsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (xor:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "" + "ix86_expand_binary_operator (XOR, SImode, operands); DONE;") + +(define_insn "*xorsi_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "ri,rm"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +;; Add speccase for immediates +(define_insn "*xorsi_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*xorsi_1_zext_imm" + [(set (match_operand:DI 0 "register_operand" "=r") + (xor:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) + (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*xorsi_2" + [(set (reg FLAGS_REG) + (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0") + (match_operand:SI 2 "general_operand" "g,ri")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm") + (xor:SI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +;; See comment for addsi_1_zext why we do use nonimmediate_operand +;; ??? Special case for immediate operand is missing - it is tricky. +(define_insn "*xorsi_2_zext" + [(set (reg FLAGS_REG) + (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (xor:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*xorsi_2_zext_imm" + [(set (reg FLAGS_REG) + (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand 2 "x86_64_zext_immediate_operand" "Z")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (xor:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, SImode, operands)" + "xor{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_insn "*xorsi_3" + [(set (reg FLAGS_REG) + (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0") + (match_operand:SI 2 "general_operand" "g")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "xor{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "SI")]) + +(define_expand "xorhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (xor:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "general_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (XOR, HImode, operands); DONE;") + +(define_insn "*xorhi_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m") + (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rmn,rn"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (XOR, HImode, operands)" + "xor{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*xorhi_2" + [(set (reg FLAGS_REG) + (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0") + (match_operand:HI 2 "general_operand" "rmn,rn")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm") + (xor:HI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, HImode, operands)" + "xor{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_insn "*xorhi_3" + [(set (reg FLAGS_REG) + (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0") + (match_operand:HI 2 "general_operand" "rmn")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "xor{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "HI")]) + +(define_expand "xorqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "general_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (XOR, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? +(define_insn "*xorqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r") + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") + (match_operand:QI 2 "general_operand" "qmn,qn,rn"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (XOR, QImode, operands)" + "@ + xor{b}\t{%2, %0|%0, %2} + xor{b}\t{%2, %0|%0, %2} + xor{l}\t{%k2, %k0|%k0, %k2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI,QI,SI")]) + +(define_insn "*xorqi_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q")) + (xor:QI (match_dup 0) + (match_operand:QI 1 "general_operand" "qn,qmn"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "xor{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_0" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand 2 "const_int_operand" "n"))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "1") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_1" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand:QI 2 "general_operand" "Qm")))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_1_rex64" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extend:SI + (match_operand 2 "ext_register_operand" "Q")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_ext_2" + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (zero_extract:SI (match_operand 2 "ext_register_operand" "Q") + (const_int 8) + (const_int 8)))) + (clobber (reg:CC FLAGS_REG))] + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))" + "xor{b}\t{%h2, %h0|%h0, %h2}" + [(set_attr "type" "alu") + (set_attr "length_immediate" "0") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_cc_1" + [(set (reg FLAGS_REG) + (compare + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") + (match_operand:QI 2 "general_operand" "qmn,qn")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm") + (xor:QI (match_dup 1) (match_dup 2)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_binary_operator_ok (XOR, QImode, operands)" + "xor{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_2_slp" + [(set (reg FLAGS_REG) + (compare (xor:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm") + (match_operand:QI 1 "general_operand" "qmn,qn")) + (const_int 0))) + (set (strict_low_part (match_dup 0)) + (xor:QI (match_dup 0) (match_dup 1)))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "xor{b}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_cc_2" + [(set (reg FLAGS_REG) + (compare + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0") + (match_operand:QI 2 "general_operand" "qmn")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "xor{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_cc_ext_1" + [(set (reg FLAGS_REG) + (compare + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_operand" "qmn")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8)) + (match_dup 2)))] + "!TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_insn "*xorqi_cc_ext_1_rex64" + [(set (reg FLAGS_REG) + (compare + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "0") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "nonmemory_operand" "Qn")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8)) + (match_dup 2)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + "xor{b}\t{%2, %h0|%h0, %2}" + [(set_attr "type" "alu") + (set_attr "mode" "QI")]) + +(define_expand "xorqi_cc_ext_1" + [(parallel [ + (set (reg:CCNO FLAGS_REG) + (compare:CCNO + (xor:SI + (zero_extract:SI + (match_operand 1 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand:QI 2 "general_operand" "")) + (const_int 0))) + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (xor:SI + (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8)) + (match_dup 2)))])] + "" + "") + +(define_split + [(set (match_operand 0 "register_operand" "") + (xor (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(INTVAL (operands[2]) & ~(255 << 8)) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8)) + (xor:SI (zero_extract:SI (match_dup 1) + (const_int 8) (const_int 8)) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[2] = gen_int_mode ((INTVAL (operands[2]) >> 8) & 0xff, SImode);") + +;; Since XOR can be encoded with sign extended immediate, this is only +;; profitable when 7th bit is set. +(define_split + [(set (match_operand 0 "register_operand" "") + (xor (match_operand 1 "general_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && ANY_QI_REG_P (operands[0]) + && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(INTVAL (operands[2]) & ~255) + && (INTVAL (operands[2]) & 128) + && GET_MODE (operands[0]) != QImode" + [(parallel [(set (strict_low_part (match_dup 0)) + (xor:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (QImode, operands[0]); + operands[1] = gen_lowpart (QImode, operands[1]); + operands[2] = gen_lowpart (QImode, operands[2]);") + +;; Negation instructions + +(define_expand "negti2" + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (neg:TI (match_operand:TI 1 "nonimmediate_operand" "")))] + "TARGET_64BIT" + "ix86_expand_unary_operator (NEG, TImode, operands); DONE;") + +(define_insn "*negti2_1" + [(set (match_operand:TI 0 "nonimmediate_operand" "=ro") + (neg:TI (match_operand:TI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && ix86_unary_operator_ok (NEG, TImode, operands)" + "#") + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (neg:TI (match_operand:TI 1 "nonimmediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(parallel + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:DI (match_dup 1)) (const_int 0))) + (set (match_dup 0) (neg:DI (match_dup 1)))]) + (parallel + [(set (match_dup 2) + (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 3)) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 2) + (neg:DI (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "split_ti (&operands[0], 2, &operands[0], &operands[2]);") + +(define_expand "negdi2" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (neg:DI (match_operand:DI 1 "nonimmediate_operand" "")))] + "" + "ix86_expand_unary_operator (NEG, DImode, operands); DONE;") + +(define_insn "*negdi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=ro") + (neg:DI (match_operand:DI 1 "general_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT + && ix86_unary_operator_ok (NEG, DImode, operands)" + "#") + +(define_split + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (neg:DI (match_operand:DI 1 "general_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed" + [(parallel + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:SI (match_dup 1)) (const_int 0))) + (set (match_dup 0) (neg:SI (match_dup 1)))]) + (parallel + [(set (match_dup 2) + (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0)) + (match_dup 3)) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 2) + (neg:SI (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (&operands[0], 2, &operands[0], &operands[2]);"); + +(define_insn "*negdi2_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (neg:DI (match_operand:DI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)" + "neg{q}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "DI")]) + +;; The problem with neg is that it does not perform (compare x 0), +;; it really performs (compare 0 x), which leaves us with the zero +;; flag being the only useful item. + +(define_insn "*negdi2_cmpz_rex64" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:DI (match_operand:DI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (neg:DI (match_dup 1)))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, DImode, operands)" + "neg{q}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "DI")]) + + +(define_expand "negsi2" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (neg:SI (match_operand:SI 1 "nonimmediate_operand" "")))] + "" + "ix86_expand_unary_operator (NEG, SImode, operands); DONE;") + +(define_insn "*negsi2_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +;; Combine is quite creative about this pattern. +(define_insn "*negsi2_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (neg:DI (ashift:DI (match_operand:DI 1 "register_operand" "0") + (const_int 32))) + (const_int 32))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +;; The problem with neg is that it does not perform (compare x 0), +;; it really performs (compare 0 x), which leaves us with the zero +;; flag being the only useful item. + +(define_insn "*negsi2_cmpz" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (neg:SI (match_dup 1)))] + "ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +(define_insn "*negsi2_cmpz_zext" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (lshiftrt:DI + (neg:DI (ashift:DI + (match_operand:DI 1 "register_operand" "0") + (const_int 32))) + (const_int 32)) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (neg:DI (ashift:DI (match_dup 1) + (const_int 32))) + (const_int 32)))] + "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)" + "neg{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +(define_expand "neghi2" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (neg:HI (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_unary_operator (NEG, HImode, operands); DONE;") + +(define_insn "*neghi2_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_unary_operator_ok (NEG, HImode, operands)" + "neg{w}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "HI")]) + +(define_insn "*neghi2_cmpz" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:HI (match_operand:HI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (neg:HI (match_dup 1)))] + "ix86_unary_operator_ok (NEG, HImode, operands)" + "neg{w}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "HI")]) + +(define_expand "negqi2" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (neg:QI (match_operand:QI 1 "nonimmediate_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_unary_operator (NEG, QImode, operands); DONE;") + +(define_insn "*negqi2_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_unary_operator_ok (NEG, QImode, operands)" + "neg{b}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "QI")]) + +(define_insn "*negqi2_cmpz" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:QI (match_operand:QI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (neg:QI (match_dup 1)))] + "ix86_unary_operator_ok (NEG, QImode, operands)" + "neg{b}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "QI")]) + +;; Changing of sign for FP values is doable using integer unit too. + +(define_expand "2" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "")))] + "TARGET_80387 || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "ix86_expand_fp_absneg_operator (, mode, operands); DONE;") + +(define_insn "*absneg2_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r") + (match_operator:MODEF 3 "absneg_operator" + [(match_operand:MODEF 1 "register_operand" "0,x,0,0")])) + (use (match_operand: 2 "nonimmediate_operand" "xm,0,X,X")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)" + "#") + +(define_insn "*absneg2_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x,x,!r") + (match_operator:MODEF 3 "absneg_operator" + [(match_operand:MODEF 1 "register_operand" "0 ,x,0")])) + (use (match_operand: 2 "register_operand" "xm,0,X")) + (clobber (reg:CC FLAGS_REG))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "#") + +(define_insn "*absneg2_i387" + [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r") + (match_operator:X87MODEF 3 "absneg_operator" + [(match_operand:X87MODEF 1 "register_operand" "0,0")])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_80387 && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "#") + +(define_expand "tf2" + [(set (match_operand:TF 0 "register_operand" "") + (absneg:TF (match_operand:TF 1 "register_operand" "")))] + "TARGET_SSE2" + "ix86_expand_fp_absneg_operator (, TFmode, operands); DONE;") + +(define_insn "*absnegtf2_sse" + [(set (match_operand:TF 0 "register_operand" "=x,x") + (match_operator:TF 3 "absneg_operator" + [(match_operand:TF 1 "register_operand" "0,x")])) + (use (match_operand:TF 2 "nonimmediate_operand" "xm,0")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_SSE2" + "#") + +;; Splitters for fp abs and neg. + +(define_split + [(set (match_operand 0 "fp_register_operand" "") + (match_operator 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(set (match_dup 0) (match_op_dup 1 [(match_dup 0)]))]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "absneg_operator" + [(match_operand 1 "register_operand" "")])) + (use (match_operand 2 "nonimmediate_operand" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed && SSE_REG_P (operands[0])" + [(set (match_dup 0) (match_dup 3))] +{ + enum machine_mode mode = GET_MODE (operands[0]); + enum machine_mode vmode = GET_MODE (operands[2]); + rtx tmp; + + operands[0] = simplify_gen_subreg (vmode, operands[0], mode, 0); + operands[1] = simplify_gen_subreg (vmode, operands[1], mode, 0); + if (operands_match_p (operands[0], operands[2])) + { + tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } + if (GET_CODE (operands[3]) == ABS) + tmp = gen_rtx_AND (vmode, operands[1], operands[2]); + else + tmp = gen_rtx_XOR (vmode, operands[1], operands[2]); + operands[3] = tmp; +}) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operator:SF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand:V4SF 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + operands[0] = gen_lowpart (SImode, operands[0]); + if (GET_CODE (operands[1]) == ABS) + { + tmp = gen_int_mode (0x7fffffff, SImode); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = gen_int_mode (0x80000000, SImode); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + operands[1] = tmp; +}) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operator:DF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + if (TARGET_64BIT) + { + tmp = gen_lowpart (DImode, operands[0]); + tmp = gen_rtx_ZERO_EXTRACT (DImode, tmp, const1_rtx, GEN_INT (63)); + operands[0] = tmp; + + if (GET_CODE (operands[1]) == ABS) + tmp = const0_rtx; + else + tmp = gen_rtx_NOT (DImode, tmp); + } + else + { + operands[0] = gen_highpart (SImode, operands[0]); + if (GET_CODE (operands[1]) == ABS) + { + tmp = gen_int_mode (0x7fffffff, SImode); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = gen_int_mode (0x80000000, SImode); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + } + operands[1] = tmp; +}) + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (match_operator:XF 1 "absneg_operator" [(match_dup 0)])) + (use (match_operand 2 "" "")) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (clobber (reg:CC FLAGS_REG))])] +{ + rtx tmp; + operands[0] = gen_rtx_REG (SImode, + true_regnum (operands[0]) + + (TARGET_64BIT ? 1 : 2)); + if (GET_CODE (operands[1]) == ABS) + { + tmp = GEN_INT (0x7fff); + tmp = gen_rtx_AND (SImode, operands[0], tmp); + } + else + { + tmp = GEN_INT (0x8000); + tmp = gen_rtx_XOR (SImode, operands[0], tmp); + } + operands[1] = tmp; +}) + +;; Conditionalize these after reload. If they match before reload, we +;; lose the clobber and ability to use integer instructions. + +(define_insn "*2_1" + [(set (match_operand:X87MODEF 0 "register_operand" "=f") + (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))] + "TARGET_80387 + && (reload_completed + || !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))" + "f" + [(set_attr "type" "fsgn") + (set_attr "mode" "")]) + +(define_insn "*extendsfdf2" + [(set (match_operand:DF 0 "register_operand" "=f") + (absneg:DF (float_extend:DF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)" + "f" + [(set_attr "type" "fsgn") + (set_attr "mode" "DF")]) + +(define_insn "*extendsfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (absneg:XF (float_extend:XF + (match_operand:SF 1 "register_operand" "0"))))] + "TARGET_80387" + "f" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) + +(define_insn "*extenddfxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (absneg:XF (float_extend:XF + (match_operand:DF 1 "register_operand" "0"))))] + "TARGET_80387" + "f" + [(set_attr "type" "fsgn") + (set_attr "mode" "XF")]) + +;; Copysign instructions + +(define_mode_iterator CSGNMODE [SF DF TF]) +(define_mode_attr CSGNVMODE [(SF "V4SF") (DF "V2DF") (TF "TF")]) + +(define_expand "copysign3" + [(match_operand:CSGNMODE 0 "register_operand" "") + (match_operand:CSGNMODE 1 "nonmemory_operand" "") + (match_operand:CSGNMODE 2 "register_operand" "")] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_SSE2 && (mode == TFmode))" +{ + ix86_expand_copysign (operands); + DONE; +}) + +(define_insn_and_split "copysign3_const" + [(set (match_operand:CSGNMODE 0 "register_operand" "=x") + (unspec:CSGNMODE + [(match_operand: 1 "vector_move_operand" "xmC") + (match_operand:CSGNMODE 2 "register_operand" "0") + (match_operand: 3 "nonimmediate_operand" "xm")] + UNSPEC_COPYSIGN))] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_SSE2 && (mode == TFmode))" + "#" + "&& reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_const (operands); + DONE; +}) + +(define_insn "copysign3_var" + [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x") + (unspec:CSGNMODE + [(match_operand:CSGNMODE 2 "register_operand" "x,0,0,x,x") + (match_operand:CSGNMODE 3 "register_operand" "1,1,x,1,x") + (match_operand: 4 "nonimmediate_operand" "X,xm,xm,0,0") + (match_operand: 5 "nonimmediate_operand" "0,xm,1,xm,1")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch: 1 "=x,x,x,x,x"))] + "(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_SSE2 && (mode == TFmode))" + "#") + +(define_split + [(set (match_operand:CSGNMODE 0 "register_operand" "") + (unspec:CSGNMODE + [(match_operand:CSGNMODE 2 "register_operand" "") + (match_operand:CSGNMODE 3 "register_operand" "") + (match_operand: 4 "" "") + (match_operand: 5 "" "")] + UNSPEC_COPYSIGN)) + (clobber (match_scratch: 1 ""))] + "((SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || (TARGET_SSE2 && (mode == TFmode))) + && reload_completed" + [(const_int 0)] +{ + ix86_split_copysign_var (operands); + DONE; +}) + +;; One complement instructions + +(define_expand "one_cmpldi2" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (not:DI (match_operand:DI 1 "nonimmediate_operand" "")))] + "TARGET_64BIT" + "ix86_expand_unary_operator (NOT, DImode, operands); DONE;") + +(define_insn "*one_cmpldi2_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (not:DI (match_operand:DI 1 "nonimmediate_operand" "0")))] + "TARGET_64BIT && ix86_unary_operator_ok (NOT, DImode, operands)" + "not{q}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "DI")]) + +(define_insn "*one_cmpldi2_2_rex64" + [(set (reg FLAGS_REG) + (compare (not:DI (match_operand:DI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (not:DI (match_dup 1)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, DImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:DI (match_operand:DI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:DI 1 "nonimmediate_operand" "") + (not:DI (match_dup 3)))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 + [(xor:DI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:DI (match_dup 3) (const_int -1)))])] + "") + +(define_expand "one_cmplsi2" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))] + "" + "ix86_expand_unary_operator (NOT, SImode, operands); DONE;") + +(define_insn "*one_cmplsi2_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (not:SI (match_operand:SI 1 "nonimmediate_operand" "0")))] + "ix86_unary_operator_ok (NOT, SImode, operands)" + "not{l}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +;; ??? Currently never generated - xor is used instead. +(define_insn "*one_cmplsi2_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (not:SI (match_operand:SI 1 "register_operand" "0"))))] + "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)" + "not{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "SI")]) + +(define_insn "*one_cmplsi2_2" + [(set (reg FLAGS_REG) + (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (not:SI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, SImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:SI (match_operand:SI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:SI 1 "nonimmediate_operand" "") + (not:SI (match_dup 3)))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:SI (match_dup 3) (const_int -1)))])] + "") + +;; ??? Currently never generated - xor is used instead. +(define_insn "*one_cmplsi2_2_zext" + [(set (reg FLAGS_REG) + (compare (not:SI (match_operand:SI 1 "register_operand" "0")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (not:SI (match_dup 1))))] + "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, SImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:SI (match_operand:SI 3 "register_operand" "")) + (const_int 0)])) + (set (match_operand:DI 1 "register_operand" "") + (zero_extend:DI (not:SI (match_dup 3))))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:SI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])] + "") + +(define_expand "one_cmplhi2" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_unary_operator (NOT, HImode, operands); DONE;") + +(define_insn "*one_cmplhi2_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (not:HI (match_operand:HI 1 "nonimmediate_operand" "0")))] + "ix86_unary_operator_ok (NOT, HImode, operands)" + "not{w}\t%0" + [(set_attr "type" "negnot") + (set_attr "mode" "HI")]) + +(define_insn "*one_cmplhi2_2" + [(set (reg FLAGS_REG) + (compare (not:HI (match_operand:HI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (not:HI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NEG, HImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "HI")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:HI (match_operand:HI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:HI 1 "nonimmediate_operand" "") + (not:HI (match_dup 3)))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:HI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:HI (match_dup 3) (const_int -1)))])] + "") + +;; %%% Potential partial reg stall on alternative 1. What to do? +(define_expand "one_cmplqi2" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_unary_operator (NOT, QImode, operands); DONE;") + +(define_insn "*one_cmplqi2_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")))] + "ix86_unary_operator_ok (NOT, QImode, operands)" + "@ + not{b}\t%0 + not{l}\t%k0" + [(set_attr "type" "negnot") + (set_attr "mode" "QI,SI")]) + +(define_insn "*one_cmplqi2_2" + [(set (reg FLAGS_REG) + (compare (not:QI (match_operand:QI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (not:QI (match_dup 1)))] + "ix86_match_ccmode (insn, CCNOmode) + && ix86_unary_operator_ok (NOT, QImode, operands)" + "#" + [(set_attr "type" "alu1") + (set_attr "mode" "QI")]) + +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(not:QI (match_operand:QI 3 "nonimmediate_operand" "")) + (const_int 0)])) + (set (match_operand:QI 1 "nonimmediate_operand" "") + (not:QI (match_dup 3)))] + "ix86_match_ccmode (insn, CCNOmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(xor:QI (match_dup 3) (const_int -1)) + (const_int 0)])) + (set (match_dup 1) + (xor:QI (match_dup 3) (const_int -1)))])] + "") + +;; Arithmetic shift instructions + +;; DImode shifts are implemented using the i386 "shift double" opcode, +;; which is written as "sh[lr]d[lw] imm,reg,reg/mem". If the shift count +;; is variable, then the count is in %cl and the "imm" operand is dropped +;; from the assembler input. +;; +;; This instruction shifts the target reg/mem as usual, but instead of +;; shifting in zeros, bits are shifted in from reg operand. If the insn +;; is a left shift double, bits are taken from the high order bits of +;; reg, else if the insn is a shift right double, bits are taken from the +;; low order bits of reg. So if %eax is "1234" and %edx is "5678", +;; "shldl $8,%edx,%eax" leaves %edx unchanged and sets %eax to "2345". +;; +;; Since sh[lr]d does not change the `reg' operand, that is done +;; separately, making all shifts emit pairs of shift double and normal +;; shift. Since sh[lr]d does not shift more than 31 bits, and we wish to +;; support a 63 bit shift, each shift where the count is in a reg expands +;; to a pair of shifts, a branch, a shift by 32 and a label. +;; +;; If the shift count is a constant, we need never emit more than one +;; shift pair, instead using moves and sign extension for counts greater +;; than 31. + +(define_expand "ashlti3" + [(set (match_operand:TI 0 "register_operand" "") + (ashift:TI (match_operand:TI 1 "reg_or_pm1_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (ASHIFT, TImode, operands); DONE;") + +;; This pattern must be defined before *ashlti3_1 to prevent +;; combine pass from converting sse2_ashlti3 to *ashlti3_1. + +(define_insn "*avx_ashlti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (ashift:TI (match_operand:TI 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_AVX" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "vpslldq\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set_attr "mode" "TI")]) + +(define_insn "sse2_ashlti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (ashift:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "pslldq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) + +(define_insn "*ashlti3_1" + [(set (match_operand:TI 0 "register_operand" "=&r,r") + (ashift:TI (match_operand:TI 1 "reg_or_pm1_operand" "n,0") + (match_operand:QI 2 "nonmemory_operand" "Oc,Oc"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_peephole2 + [(match_scratch:DI 3 "r") + (parallel [(set (match_operand:TI 0 "register_operand" "") + (ashift:TI (match_operand:TI 1 "nonmemory_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "TARGET_64BIT" + [(const_int 0)] + "ix86_split_ashl (operands, operands[3], TImode); DONE;") + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (ashift:TI (match_operand:TI 1 "nonmemory_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed)" + [(const_int 0)] + "ix86_split_ashl (operands, NULL_RTX, TImode); DONE;") + +(define_insn "x86_64_shld" + [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") + (ior:DI (ashift:DI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Jc")) + (lshiftrt:DI (match_operand:DI 1 "register_operand" "r") + (minus:QI (const_int 64) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "shld{q}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector")]) + +(define_expand "x86_64_shift_adj_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "") + (const_int 64)) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:DI 1 "register_operand" "") + (match_dup 0))) + (set (match_dup 1) + (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:DI 3 "register_operand" "r") + (match_dup 1)))] + "TARGET_64BIT" + "") + +(define_expand "x86_64_shift_adj_2" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "TARGET_64BIT" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + ix86_expand_clear (operands[1]); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +(define_expand "ashldi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (ashift:DI (match_operand:DI 1 "ashldi_input_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (ASHIFT, DImode, operands); DONE;") + +(define_insn "*ashldi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r") + (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0,l") + (match_operand:QI 2 "nonmemory_operand" "cJ,M"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "add{q}\t%0, %0"; + + case TYPE_LEA: + gcc_assert (CONST_INT_P (operands[2])); + gcc_assert ((unsigned HOST_WIDE_INT) INTVAL (operands[2]) <= 3); + operands[1] = gen_rtx_MULT (DImode, operands[1], + GEN_INT (1 << INTVAL (operands[2]))); + return "lea{q}\t{%a1, %0|%0, %a1}"; + + default: + if (REG_P (operands[2])) + return "sal{q}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{q}\t%0"; + else + return "sal{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "DI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "index_register_operand" "") + (match_operand:QI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) + (mult:DI (match_dup 1) + (match_dup 2)))] + "operands[2] = gen_int_mode (1 << INTVAL (operands[2]), DImode);") + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashldi3_cmp_rex64" + [(set (reg FLAGS_REG) + (compare + (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_63_operand" "J")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (ashift:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{q}\t%0, %0"; + + default: + if (REG_P (operands[2])) + return "sal{q}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{q}\t%0"; + else + return "sal{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "DI")]) + +(define_insn "*ashldi3_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_63_operand" "J")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, DImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{q}\t%0, %0"; + + default: + if (REG_P (operands[2])) + return "sal{q}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{q}\t%0"; + else + return "sal{q}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "DI")]) + +(define_insn "*ashldi3_1" + [(set (match_operand:DI 0 "register_operand" "=&r,r") + (ashift:DI (match_operand:DI 1 "reg_or_pm1_operand" "n,0") + (match_operand:QI 2 "nonmemory_operand" "Jc,Jc"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +;; By default we don't ask for a scratch register, because when DImode +;; values are manipulated, registers are already at a premium. But if +;; we have one handy, we won't turn it away. +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "nonmemory_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "!TARGET_64BIT && TARGET_CMOVE" + [(const_int 0)] + "ix86_split_ashl (operands, operands[3], DImode); DONE;") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (ashift:DI (match_operand:DI 1 "nonmemory_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed)" + [(const_int 0)] + "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;") + +(define_insn "x86_shld" + [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") + (ior:SI (ashift:SI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Ic")) + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r") + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "" + "shld{l}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "SI") + (set_attr "pent_pair" "np") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector")]) + +(define_expand "x86_shift_adj_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "") + (const_int 32)) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:SI 1 "register_operand" "") + (match_dup 0))) + (set (match_dup 1) + (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:SI 3 "register_operand" "r") + (match_dup 1)))] + "TARGET_CMOVE" + "") + +(define_expand "x86_shift_adj_2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:SI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + ix86_expand_clear (operands[1]); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (ASHIFT, SImode, operands); DONE;") + +(define_insn "*ashlsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r") + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l") + (match_operand:QI 2 "nonmemory_operand" "cI,M"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + gcc_assert (rtx_equal_p (operands[0], operands[1])); + return "add{l}\t%0, %0"; + + case TYPE_LEA: + return "#"; + + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{l}\t%0"; + else + return "sal{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "index_register_operand" "") + (match_operand:QI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1]) + && GET_MODE_SIZE (GET_MODE (operands[0])) <= 4" + [(const_int 0)] +{ + rtx pat; + enum machine_mode mode = GET_MODE (operands[0]); + + if (GET_MODE_SIZE (mode) < 4) + operands[0] = gen_lowpart (SImode, operands[0]); + if (mode != Pmode) + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); + + pat = gen_rtx_MULT (Pmode, operands[1], operands[2]); + if (Pmode != SImode) + pat = gen_rtx_SUBREG (SImode, pat, 0); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], pat)); + DONE; +}) + +;; Rare case of shifting RSP is handled by generating move and shift +(define_split + [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(const_int 0)] +{ + rtx pat, clob; + emit_move_insn (operands[0], operands[1]); + pat = gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_ASHIFT (GET_MODE (operands[0]), + operands[0], operands[2])); + clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clob))); + DONE; +}) + +(define_insn "*ashlsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (ashift:SI (match_operand:SI 1 "register_operand" "0,l") + (match_operand:QI 2 "nonmemory_operand" "cI,M")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t%k0, %k0"; + + case TYPE_LEA: + return "#"; + + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %k0|%k0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{l}\t%k0"; + else + return "sal{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +;; Convert lea to the lea pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (ashift (match_operand 1 "register_operand" "") + (match_operand:QI 2 "const_int_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed + && true_regnum (operands[0]) != true_regnum (operands[1])" + [(set (match_dup 0) (zero_extend:DI + (subreg:SI (mult:SI (match_dup 1) + (match_dup 2)) 0)))] +{ + operands[1] = gen_lowpart (Pmode, operands[1]); + operands[2] = gen_int_mode (1 << INTVAL (operands[2]), Pmode); +}) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashlsi3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (ashift:SI (match_dup 1) (match_dup 2)))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t%0, %0"; + + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{l}\t%0"; + else + return "sal{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +(define_insn "*ashlsi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t%0, %0"; + + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{l}\t%0"; + else + return "sal{l}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +(define_insn "*ashlsi3_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, SImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{l}\t%k0, %k0"; + + default: + if (REG_P (operands[2])) + return "sal{l}\t{%b2, %k0|%k0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{l}\t%k0"; + else + return "sal{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set (attr "type") + (cond [(and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "SI")]) + +(define_expand "ashlhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (ASHIFT, HImode, operands); DONE;") + +(define_insn "*ashlhi3_1_lea" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l") + (match_operand:QI 2 "nonmemory_operand" "cI,M"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t%0, %0"; + + default: + if (REG_P (operands[2])) + return "sal{w}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "HI,SI")]) + +(define_insn "*ashlhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "cI"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t%0, %0"; + + default: + if (REG_P (operands[2])) + return "sal{w}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "HI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashlhi3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashift:HI (match_dup 1) (match_dup 2)))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t%0, %0"; + + default: + if (REG_P (operands[2])) + return "sal{w}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "HI")]) + +(define_insn "*ashlhi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{w}\t%0, %0"; + + default: + if (REG_P (operands[2])) + return "sal{w}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{w}\t%0"; + else + return "sal{w}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "HI")]) + +(define_expand "ashlqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (ASHIFT, QImode, operands); DONE;") + +;; %%% Potential partial reg stall on alternative 2. What to do? + +(define_insn "*ashlqi3_1_lea" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l") + (match_operand:QI 2 "nonmemory_operand" "cI,cI,M"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_LEA: + return "#"; + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1])) + return "add{l}\t%k0, %k0"; + else + return "add{b}\t%0, %0"; + + default: + if (REG_P (operands[2])) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%b2, %k0|%k0, %b2}"; + else + return "sal{b}\t{%b2, %0|%0, %b2}"; + } + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t%0"; + else + return "sal{b}\t%0"; + } + else + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%2, %k0|%k0, %2}"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "2") + (const_string "lea") + (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "QI,SI,SI")]) + +(define_insn "*ashlqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "cI,cI"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_PARTIAL_REG_STALL + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1])) + return "add{l}\t%k0, %k0"; + else + return "add{b}\t%0, %0"; + + default: + if (REG_P (operands[2])) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%b2, %k0|%k0, %b2}"; + else + return "sal{b}\t{%b2, %0|%0, %b2}"; + } + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t%0"; + else + return "sal{b}\t%0"; + } + else + { + if (get_attr_mode (insn) == MODE_SI) + return "sal{l}\t{%2, %k0|%k0, %2}"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "QI,SI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashlqi3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (ashift:QI (match_dup 1) (match_dup 2)))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{b}\t%0, %0"; + + default: + if (REG_P (operands[2])) + return "sal{b}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{b}\t%0"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "QI")]) + +(define_insn "*ashlqi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "(optimize_function_for_size_p (cfun) + || !TARGET_PARTIAL_FLAG_REG_STALL + || (operands[2] == const1_rtx + && (TARGET_SHIFT1 + || TARGET_DOUBLE_WITH_ADD))) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFT, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + gcc_assert (operands[2] == const1_rtx); + return "add{b}\t%0, %0"; + + default: + if (REG_P (operands[2])) + return "sal{b}\t{%b2, %0|%0, %b2}"; + else if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "sal{b}\t%0"; + else + return "sal{b}\t{%2, %0|%0, %2}"; + } +} + [(set (attr "type") + (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") + (const_int 0)) + (match_operand 0 "register_operand" "")) + (match_operand 2 "const1_operand" "")) + (const_string "alu") + ] + (const_string "ishift"))) + (set_attr "mode" "QI")]) + +;; See comment above `ashldi3' about how this works. + +(define_expand "ashrti3" + [(set (match_operand:TI 0 "register_operand" "") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (ASHIFTRT, TImode, operands); DONE;") + +(define_insn "*ashrti3_1" + [(set (match_operand:TI 0 "register_operand" "=r") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Oc"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_peephole2 + [(match_scratch:DI 3 "r") + (parallel [(set (match_operand:TI 0 "register_operand" "") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "TARGET_64BIT" + [(const_int 0)] + "ix86_split_ashr (operands, operands[3], TImode); DONE;") + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (ashiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed)" + [(const_int 0)] + "ix86_split_ashr (operands, NULL_RTX, TImode); DONE;") + +(define_insn "x86_64_shrd" + [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") + (ior:DI (ashiftrt:DI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Jc")) + (ashift:DI (match_operand:DI 1 "register_operand" "r") + (minus:QI (const_int 64) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "shrd{q}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "mode" "DI") + (set_attr "athlon_decode" "vector") + (set_attr "amdfam10_decode" "vector")]) + +(define_expand "ashrdi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (ASHIFTRT, DImode, operands); DONE;") + +(define_expand "x86_64_shift_adj_3" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + emit_insn (gen_ashrdi3_63_rex64 (operands[1], operands[1], GEN_INT (63))); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +(define_insn "ashrdi3_63_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm") + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0") + (match_operand:DI 2 "const_int_operand" "i,i"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && INTVAL (operands[2]) == 63 + && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "@ + {cqto|cqo} + sar{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "DI")]) + +(define_insn "*ashrdi3_1_one_bit_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "sar{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrdi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "J,c"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "@ + sar{q}\t{%2, %0|%0, %2} + sar{q}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrdi3_one_bit_cmp_rex64" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (ashiftrt:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "sar{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrdi3_one_bit_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "sar{q}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrdi3_cmp_rex64" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_63_operand" "J")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (ashiftrt:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "sar{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +(define_insn "*ashrdi3_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_63_operand" "J")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)" + "sar{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +(define_insn "*ashrdi3_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Jc"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +;; By default we don't ask for a scratch register, because when DImode +;; values are manipulated, registers are already at a premium. But if +;; we have one handy, we won't turn it away. +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "!TARGET_64BIT && TARGET_CMOVE" + [(const_int 0)] + "ix86_split_ashr (operands, operands[3], DImode); DONE;") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed)" + [(const_int 0)] + "ix86_split_ashr (operands, NULL_RTX, DImode); DONE;") + +(define_insn "x86_shrd" + [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") + (ior:SI (ashiftrt:SI (match_dup 0) + (match_operand:QI 2 "nonmemory_operand" "Ic")) + (ashift:SI (match_operand:SI 1 "register_operand" "r") + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "" + "shrd{l}\t{%s2%1, %0|%0, %1, %2}" + [(set_attr "type" "ishift") + (set_attr "prefix_0f" "1") + (set_attr "pent_pair" "np") + (set_attr "mode" "SI")]) + +(define_expand "x86_shift_adj_3" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:SI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_move_insn (operands[0], operands[1]); + emit_insn (gen_ashrsi3_31 (operands[1], operands[1], GEN_INT (31))); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +(define_expand "ashrsi3_31" + [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0") + (match_operand:SI 2 "const_int_operand" "i,i"))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_insn "*ashrsi3_31" + [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0") + (match_operand:SI 2 "const_int_operand" "i,i"))) + (clobber (reg:CC FLAGS_REG))] + "INTVAL (operands[2]) == 31 + && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + {cltd|cdq} + sar{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_31_zext" + [(set (match_operand:DI 0 "register_operand" "=*d,r") + (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0") + (match_operand:SI 2 "const_int_operand" "i,i")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) + && INTVAL (operands[2]) == 31 + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + {cltd|cdq} + sar{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "imovx,ishift") + (set_attr "prefix_0f" "0,*") + (set_attr "length_immediate" "0,*") + (set_attr "modrm" "0,1") + (set_attr "mode" "SI")]) + +(define_expand "ashrsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (ASHIFTRT, SImode, operands); DONE;") + +(define_insn "*ashrsi3_1_one_bit" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrsi3_1_one_bit_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const1_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t%k0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +(define_insn "*ashrsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + sar{l}\t{%2, %0|%0, %2} + sar{l}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "@ + sar{l}\t{%2, %k0|%k0, %2} + sar{l}\t{%b2, %k0|%k0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrsi3_one_bit_cmp" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (ashiftrt:SI (match_dup 1) (match_dup 2)))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrsi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +(define_insn "*ashrsi3_one_bit_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t%k0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrsi3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (ashiftrt:SI (match_dup 1) (match_dup 2)))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*ashrsi3_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + "sar{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_expand "ashrhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (ASHIFTRT, HImode, operands); DONE;") + +(define_insn "*ashrhi3_1_one_bit" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "sar{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "@ + sar{w}\t{%2, %0|%0, %2} + sar{w}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrhi3_one_bit_cmp" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashiftrt:HI (match_dup 1) (match_dup 2)))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "sar{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrhi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "sar{w}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrhi3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (ashiftrt:HI (match_dup 1) (match_dup 2)))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "sar{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +(define_insn "*ashrhi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)" + "sar{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +(define_expand "ashrqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (ASHIFTRT, QImode, operands); DONE;") + +(define_insn "*ashrqi3_1_one_bit" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (ashiftrt:QI (match_dup 0) + (match_operand:QI 1 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t%0" + [(set_attr "type" "ishift1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "@ + sar{b}\t{%2, %0|%0, %2} + sar{b}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +(define_insn "*ashrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (ashiftrt:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + sar{b}\t{%1, %0|%0, %1} + sar{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "ishift1") + (set_attr "mode" "QI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrqi3_one_bit_cmp" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "I")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (ashiftrt:QI (match_dup 1) (match_dup 2)))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*ashrqi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*ashrqi3_cmp" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (ashiftrt:QI (match_dup 1) (match_dup 2)))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +(define_insn "*ashrqi3_cconly" + [(set (reg FLAGS_REG) + (compare + (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)" + "sar{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + + +;; Logical shift instructions + +;; See comment above `ashldi3' about how this works. + +(define_expand "lshrti3" + [(set (match_operand:TI 0 "register_operand" "") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_64BIT" + "ix86_expand_binary_operator (LSHIFTRT, TImode, operands); DONE;") + +;; This pattern must be defined before *lshrti3_1 to prevent +;; combine pass from converting sse2_lshrti3 to *lshrti3_1. + +(define_insn "*avx_lshrti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_AVX" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix" "vex") + (set_attr "mode" "TI")]) + +(define_insn "sse2_lshrti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_SSE2" +{ + operands[2] = GEN_INT (INTVAL (operands[2]) / 8); + return "psrldq\t{%2, %0|%0, %2}"; +} + [(set_attr "type" "sseishft") + (set_attr "prefix_data16" "1") + (set_attr "mode" "TI")]) + +(define_insn "*lshrti3_1" + [(set (match_operand:TI 0 "register_operand" "=r") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Oc"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_peephole2 + [(match_scratch:DI 3 "r") + (parallel [(set (match_operand:TI 0 "register_operand" "") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "TARGET_64BIT" + [(const_int 0)] + "ix86_split_lshr (operands, operands[3], TImode); DONE;") + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (lshiftrt:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed)" + [(const_int 0)] + "ix86_split_lshr (operands, NULL_RTX, TImode); DONE;") + +(define_expand "lshrdi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;") + +(define_insn "*lshrdi3_1_one_bit_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrdi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "J,c"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "@ + shr{q}\t{%2, %0|%0, %2} + shr{q}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrdi3_cmp_one_bit_rex64" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (lshiftrt:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{q}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrdi3_cconly_one_bit_rex64" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{q}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrdi3_cmp_rex64" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_63_operand" "J")) + (const_int 0))) + (set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (lshiftrt:DI (match_dup 1) (match_dup 2)))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +(define_insn "*lshrdi3_cconly_rex64" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_63_operand" "J")) + (const_int 0))) + (clobber (match_scratch:DI 0 "=r"))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + +(define_insn "*lshrdi3_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Jc"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +;; By default we don't ask for a scratch register, because when DImode +;; values are manipulated, registers are already at a premium. But if +;; we have one handy, we won't turn it away. +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_dup 3)] + "!TARGET_64BIT && TARGET_CMOVE" + [(const_int 0)] + "ix86_split_lshr (operands, operands[3], DImode); DONE;") + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && ((optimize > 0 && flag_peephole2) + ? epilogue_completed : reload_completed)" + [(const_int 0)] + "ix86_split_lshr (operands, NULL_RTX, DImode); DONE;") + +(define_expand "lshrsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (LSHIFTRT, SImode, operands); DONE;") + +(define_insn "*lshrsi3_1_one_bit" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrsi3_1_one_bit_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "0")) + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t%k0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +(define_insn "*lshrsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "@ + shr{l}\t{%2, %0|%0, %2} + shr{l}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*lshrsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "@ + shr{l}\t{%2, %k0|%k0, %2} + shr{l}\t{%b2, %k0|%k0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrsi3_one_bit_cmp" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (lshiftrt:SI (match_dup 1) (match_dup 2)))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrsi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +(define_insn "*lshrsi3_cmp_one_bit_zext" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t%k0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrsi3_cmp" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (lshiftrt:SI (match_dup 1) (match_dup 2)))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*lshrsi3_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:SI 0 "=r"))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_insn "*lshrsi3_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] + "TARGET_64BIT + && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{l}\t{%2, %k0|%k0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + +(define_expand "lshrhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (LSHIFTRT, HImode, operands); DONE;") + +(define_insn "*lshrhi3_1_one_bit" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "@ + shr{w}\t{%2, %0|%0, %2} + shr{w}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrhi3_one_bit_cmp" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (lshiftrt:HI (match_dup 1) (match_dup 2)))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{w}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrhi3_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{w}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrhi3_cmp" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (lshiftrt:HI (match_dup 1) (match_dup 2)))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +(define_insn "*lshrhi3_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:HI 0 "=r"))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "shr{w}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "HI")]) + +(define_expand "lshrqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (LSHIFTRT, QImode, operands); DONE;") + +(define_insn "*lshrqi3_1_one_bit" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "shr{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (lshiftrt:QI (match_dup 0) + (match_operand:QI 1 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))" + "shr{b}\t%0" + [(set_attr "type" "ishift1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "@ + shr{b}\t{%2, %0|%0, %2} + shr{b}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +(define_insn "*lshrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (lshiftrt:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + shr{b}\t{%1, %0|%0, %1} + shr{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "ishift1") + (set_attr "mode" "QI")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrqi2_one_bit_cmp" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (lshiftrt:QI (match_dup 1) (match_dup 2)))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "shr{b}\t%0" + [(set_attr "type" "ishift") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*lshrqi2_one_bit_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" "")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "shr{b}\t%0" + [(set_attr "type" "ishift") + (set_attr "length" "2")]) + +;; This pattern can't accept a variable shift count, since shifts by +;; zero don't affect the flags. We assume that shifts by constant +;; zero are optimized away. +(define_insn "*lshrqi2_cmp" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (lshiftrt:QI (match_dup 1) (match_dup 2)))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "shr{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +(define_insn "*lshrqi2_cconly" + [(set (reg FLAGS_REG) + (compare + (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I")) + (const_int 0))) + (clobber (match_scratch:QI 0 "=q"))] + "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL) + && ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "shr{b}\t{%2, %0|%0, %2}" + [(set_attr "type" "ishift") + (set_attr "mode" "QI")]) + +;; Rotate instructions + +(define_expand "rotldi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (rotate:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" +{ + if (TARGET_64BIT) + { + ix86_expand_binary_operator (ROTATE, DImode, operands); + DONE; + } + if (!const_1_to_31_operand (operands[2], VOIDmode)) + FAIL; + emit_insn (gen_ix86_rotldi3 (operands[0], operands[1], operands[2])); + DONE; +}) + +;; Implement rotation using two double-precision shift instructions +;; and a scratch register. +(define_insn_and_split "ix86_rotldi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotate:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 3 "=&r"))] + "!TARGET_64BIT" + "" + "&& reload_completed" + [(set (match_dup 3) (match_dup 4)) + (parallel + [(set (match_dup 4) + (ior:SI (ashift:SI (match_dup 4) (match_dup 2)) + (lshiftrt:SI (match_dup 5) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 5) + (ior:SI (ashift:SI (match_dup 5) (match_dup 2)) + (lshiftrt:SI (match_dup 3) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (&operands[0], 1, &operands[4], &operands[5]);") + +(define_insn "*rotlsi3_1_one_bit_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATE, DImode, operands)" + "rol{q}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotldi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") + (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "e,c"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands)" + "@ + rol{q}\t{%2, %0|%0, %2} + rol{q}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "DI")]) + +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (ROTATE, SImode, operands); DONE;") + +(define_insn "*rotlsi3_1_one_bit" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATE, SImode, operands)" + "rol{l}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotlsi3_1_one_bit_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (rotate:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const1_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATE, SImode, operands)" + "rol{l}\t%k0" + [(set_attr "type" "rotate") + (set_attr "length" "2")]) + +(define_insn "*rotlsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") + (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATE, SImode, operands)" + "@ + rol{l}\t{%2, %0|%0, %2} + rol{l}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "SI")]) + +(define_insn "*rotlsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (rotate:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands)" + "@ + rol{l}\t{%2, %k0|%k0, %2} + rol{l}\t{%b2, %k0|%k0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "SI")]) + +(define_expand "rotlhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (ROTATE, HImode, operands); DONE;") + +(define_insn "*rotlhi3_1_one_bit" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATE, HImode, operands)" + "rol{w}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotlhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") + (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATE, HImode, operands)" + "@ + rol{w}\t{%2, %0|%0, %2} + rol{w}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "HI")]) + +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (rotate:HI (match_dup 0) (const_int 8))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (strict_low_part (match_dup 0)) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_expand "rotlqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (ROTATE, QImode, operands); DONE;") + +(define_insn "*rotlqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (rotate:QI (match_dup 0) + (match_operand:QI 1 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))" + "rol{b}\t%0" + [(set_attr "type" "rotate1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotlqi3_1_one_bit" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATE, QImode, operands)" + "rol{b}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotlqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (rotate:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + rol{b}\t{%1, %0|%0, %1} + rol{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "rotate1") + (set_attr "mode" "QI")]) + +(define_insn "*rotlqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") + (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATE, QImode, operands)" + "@ + rol{b}\t{%2, %0|%0, %2} + rol{b}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "QI")]) + +(define_expand "rotrdi3" + [(set (match_operand:DI 0 "shiftdi_operand" "") + (rotate:DI (match_operand:DI 1 "shiftdi_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" +{ + if (TARGET_64BIT) + { + ix86_expand_binary_operator (ROTATERT, DImode, operands); + DONE; + } + if (!const_1_to_31_operand (operands[2], VOIDmode)) + FAIL; + emit_insn (gen_ix86_rotrdi3 (operands[0], operands[1], operands[2])); + DONE; +}) + +;; Implement rotation using two double-precision shift instructions +;; and a scratch register. +(define_insn_and_split "ix86_rotrdi3" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotatert:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "const_1_to_31_operand" "I"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (match_scratch:SI 3 "=&r"))] + "!TARGET_64BIT" + "" + "&& reload_completed" + [(set (match_dup 3) (match_dup 4)) + (parallel + [(set (match_dup 4) + (ior:SI (ashiftrt:SI (match_dup 4) (match_dup 2)) + (ashift:SI (match_dup 5) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 5) + (ior:SI (ashiftrt:SI (match_dup 5) (match_dup 2)) + (ashift:SI (match_dup 3) + (minus:QI (const_int 32) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))])] + "split_di (&operands[0], 1, &operands[4], &operands[5]);") + +(define_insn "*rotrdi3_1_one_bit_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATERT, DImode, operands)" + "ror{q}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:DI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrdi3_1_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm") + (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "J,c"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands)" + "@ + ror{q}\t{%2, %0|%0, %2} + ror{q}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "DI")]) + +(define_expand "rotrsi3" + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "" + "ix86_expand_binary_operator (ROTATERT, SImode, operands); DONE;") + +(define_insn "*rotrsi3_1_one_bit" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") + (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATERT, SImode, operands)" + "ror{l}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrsi3_1_one_bit_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (rotatert:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const1_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATERT, SImode, operands)" + "ror{l}\t%k0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand:SI 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrsi3_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm") + (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATERT, SImode, operands)" + "@ + ror{l}\t{%2, %0|%0, %2} + ror{l}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "SI")]) + +(define_insn "*rotrsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (rotatert:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands)" + "@ + ror{l}\t{%2, %k0|%k0, %2} + ror{l}\t{%b2, %k0|%k0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "SI")]) + +(define_expand "rotrhi3" + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_HIMODE_MATH" + "ix86_expand_binary_operator (ROTATERT, HImode, operands); DONE;") + +(define_insn "*rotrhi3_one_bit" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") + (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATERT, HImode, operands)" + "ror{w}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm") + (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATERT, HImode, operands)" + "@ + ror{w}\t{%2, %0|%0, %2} + ror{w}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "HI")]) + +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (rotatert:HI (match_dup 0) (const_int 8))) + (clobber (reg:CC FLAGS_REG))] + "reload_completed" + [(parallel [(set (strict_low_part (match_dup 0)) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_expand "rotrqi3" + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "nonmemory_operand" "")))] + "TARGET_QIMODE_MATH" + "ix86_expand_binary_operator (ROTATERT, QImode, operands); DONE;") + +(define_insn "*rotrqi3_1_one_bit" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && ix86_binary_operator_ok (ROTATERT, QImode, operands)" + "ror{b}\t%0" + [(set_attr "type" "rotate") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrqi3_1_one_bit_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (rotatert:QI (match_dup 0) + (match_operand:QI 1 "const1_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))" + "ror{b}\t%0" + [(set_attr "type" "rotate1") + (set (attr "length") + (if_then_else (match_operand 0 "register_operand" "") + (const_string "2") + (const_string "*")))]) + +(define_insn "*rotrqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm") + (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") + (match_operand:QI 2 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (ROTATERT, QImode, operands)" + "@ + ror{b}\t{%2, %0|%0, %2} + ror{b}\t{%b2, %0|%0, %b2}" + [(set_attr "type" "rotate") + (set_attr "mode" "QI")]) + +(define_insn "*rotrqi3_1_slp" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm")) + (rotatert:QI (match_dup 0) + (match_operand:QI 1 "nonmemory_operand" "I,c"))) + (clobber (reg:CC FLAGS_REG))] + "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + ror{b}\t{%1, %0|%0, %1} + ror{b}\t{%b1, %0|%0, %b1}" + [(set_attr "type" "rotate1") + (set_attr "mode" "QI")]) + +;; Bit set / bit test instructions + +(define_expand "extv" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const8_operand" "") + (match_operand:SI 3 "const8_operand" "")))] + "" +{ + /* Handle extractions from %ah et al. */ + if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8) + FAIL; + + /* From mips.md: extract_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! ext_register_operand (operands[1], VOIDmode)) + FAIL; +}) + +(define_expand "extzv" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand 1 "ext_register_operand" "") + (match_operand:SI 2 "const8_operand" "") + (match_operand:SI 3 "const8_operand" "")))] + "" +{ + /* Handle extractions from %ah et al. */ + if (INTVAL (operands[2]) != 8 || INTVAL (operands[3]) != 8) + FAIL; + + /* From mips.md: extract_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! ext_register_operand (operands[1], VOIDmode)) + FAIL; +}) + +(define_expand "insv" + [(set (zero_extract (match_operand 0 "ext_register_operand" "") + (match_operand 1 "const8_operand" "") + (match_operand 2 "const8_operand" "")) + (match_operand 3 "register_operand" ""))] + "" +{ + /* Handle insertions to %ah et al. */ + if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8) + FAIL; + + /* From mips.md: insert_bit_field doesn't verify that our source + matches the predicate, so check it again here. */ + if (! ext_register_operand (operands[0], VOIDmode)) + FAIL; + + if (TARGET_64BIT) + emit_insn (gen_movdi_insv_1_rex64 (operands[0], operands[3])); + else + emit_insn (gen_movsi_insv_1 (operands[0], operands[3])); + + DONE; +}) + +;; %%% bts, btr, btc, bt. +;; In general these instructions are *slow* when applied to memory, +;; since they enforce atomic operation. When applied to registers, +;; it depends on the cpu implementation. They're never faster than +;; the corresponding and/ior/xor operations, so with 32-bit there's +;; no point. But in 64-bit, we can't hold the relevant immediates +;; within the instruction itself, so operating on bits in the high +;; 32-bits of a register becomes easier. +;; +;; These are slow on Nocona, but fast on Athlon64. We do require the use +;; of btrq and btcq for corner cases of post-reload expansion of absdf and +;; negdf respectively, so they can never be disabled entirely. + +(define_insn "*btsq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 1)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "bts{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1")]) + +(define_insn "*btrq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "btr{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1")]) + +(define_insn "*btcq" + [(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || reload_completed)" + "btc{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1")]) + +;; Allow Nocona to avoid these instructions if a register is available. + +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand" "") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 1)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; + + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); + + op1 = immed_double_const (lo, hi, DImode); + if (i >= 31) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } + + emit_insn (gen_iordi3 (operands[0], operands[0], op1)); + DONE; +}) + +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand" "") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (const_int 0)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; + + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); + + op1 = immed_double_const (~lo, ~hi, DImode); + if (i >= 32) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } + + emit_insn (gen_anddi3 (operands[0], operands[0], op1)); + DONE; +}) + +(define_peephole2 + [(match_scratch:DI 2 "r") + (parallel [(set (zero_extract:DI + (match_operand:DI 0 "register_operand" "") + (const_int 1) + (match_operand:DI 1 "const_0_to_63_operand" "")) + (not:DI (zero_extract:DI + (match_dup 0) (const_int 1) (match_dup 1)))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && !TARGET_USE_BT" + [(const_int 0)] +{ + HOST_WIDE_INT i = INTVAL (operands[1]), hi, lo; + rtx op1; + + if (HOST_BITS_PER_WIDE_INT >= 64) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else if (i < HOST_BITS_PER_WIDE_INT) + lo = (HOST_WIDE_INT)1 << i, hi = 0; + else + lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT); + + op1 = immed_double_const (lo, hi, DImode); + if (i >= 31) + { + emit_move_insn (operands[2], op1); + op1 = operands[2]; + } + + emit_insn (gen_xordi3 (operands[0], operands[0], op1)); + DONE; +}) + +(define_insn "*btdi_rex64" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:DI + (match_operand:DI 0 "register_operand" "r") + (const_int 1) + (match_operand:DI 1 "nonmemory_operand" "rN")) + (const_int 0)))] + "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))" + "bt{q}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1")]) + +(define_insn "*btsi" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_operand:SI 0 "register_operand" "r") + (const_int 1) + (match_operand:SI 1 "nonmemory_operand" "rN")) + (const_int 0)))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "bt{l}\t{%1, %0|%0, %1}" + [(set_attr "type" "alu1")]) + +;; Store-flag instructions. + +;; For all sCOND expanders, also expand the compare or test insn that +;; generates cc0. Generate an equality comparison if `seq' or `sne'. + +;; %%% Do the expansion to SImode. If PII, do things the xor+setcc way +;; to avoid partial register stalls. Otherwise do things the setcc+movzx +;; way, which can later delete the movzx if only QImode is needed. + +(define_expand "s" + [(set (match_operand:QI 0 "register_operand" "") + (int_cond:QI (reg:CC FLAGS_REG) (const_int 0)))] + "" + "if (ix86_expand_setcc (, operands[0])) DONE; else FAIL;") + +(define_expand "s" + [(set (match_operand:QI 0 "register_operand" "") + (fp_cond:QI (reg:CC FLAGS_REG) (const_int 0)))] + "TARGET_80387 || TARGET_SSE" + "if (ix86_expand_setcc (, operands[0])) DONE; else FAIL;") + +(define_insn "*setcc_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (match_operator:QI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]))] + "" + "set%C1\t%0" + [(set_attr "type" "setcc") + (set_attr "mode" "QI")]) + +(define_insn "*setcc_2" + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) + (match_operator:QI 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]))] + "" + "set%C1\t%0" + [(set_attr "type" "setcc") + (set_attr "mode" "QI")]) + +;; In general it is not safe to assume too much about CCmode registers, +;; so simplify-rtx stops when it sees a second one. Under certain +;; conditions this is safe on x86, so help combine not create +;; +;; seta %al +;; testb %al, %al +;; sete %al + +(define_split + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (ne:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + PUT_MODE (operands[1], QImode); +}) + +(define_split + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) + (ne:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + PUT_MODE (operands[1], QImode); +}) + +(define_split + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (eq:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + rtx new_op1 = copy_rtx (operands[1]); + operands[1] = new_op1; + PUT_MODE (new_op1, QImode); + PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1), + GET_MODE (XEXP (new_op1, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op1, VOIDmode)) + FAIL; +}) + +(define_split + [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "")) + (eq:QI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1))] +{ + rtx new_op1 = copy_rtx (operands[1]); + operands[1] = new_op1; + PUT_MODE (new_op1, QImode); + PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1), + GET_MODE (XEXP (new_op1, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op1, VOIDmode)) + FAIL; +}) + +;; The SSE store flag instructions saves 0 or 0xffffffff to the result. +;; subsequent logical operations are used to imitate conditional moves. +;; 0xffffffff is NaN, but not in normalized form, so we can't represent +;; it directly. + +(define_insn "*avx_setcc" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 1 "avx_comparison_float_operator" + [(match_operand:MODEF 2 "register_operand" "x") + (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] + "TARGET_AVX" + "vcmp%D1s\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssecmp") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*sse_setcc" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 1 "sse_comparison_operator" + [(match_operand:MODEF 2 "register_operand" "0") + (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (mode) && !TARGET_SSE5" + "cmp%D1s\t{%3, %0|%0, %3}" + [(set_attr "type" "ssecmp") + (set_attr "mode" "")]) + +(define_insn "*sse5_setcc" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 1 "sse5_comparison_float_operator" + [(match_operand:MODEF 2 "register_operand" "x") + (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))] + "TARGET_SSE5" + "com%Y1s\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "")]) + + +;; Basic conditional jump instructions. +;; We ignore the overflow flag for signed branch instructions. + +;; For all bCOND expanders, also expand the compare or test insn that +;; generates reg FLAGS_REG. Generate an equality comparison if `beq' or `bne'. + +(define_expand "b" + [(set (pc) + (if_then_else (int_cond:CC (reg:CC FLAGS_REG) + (const_int 0)) + (label_ref (match_operand 0 "")) + (pc)))] + "" + "ix86_expand_branch (, operands[0]); DONE;") + +(define_expand "b" + [(set (pc) + (if_then_else (fp_cond:CC (reg:CC FLAGS_REG) + (const_int 0)) + (label_ref (match_operand 0 "")) + (pc)))] + "TARGET_80387 || TARGET_SSE_MATH" + "ix86_expand_branch (, operands[0]); DONE;") + +(define_insn "*jcc_1" + [(set (pc) + (if_then_else (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (label_ref (match_operand 0 "" "")) + (pc)))] + "" + "%+j%C1\t%l0" + [(set_attr "type" "ibr") + (set_attr "modrm" "0") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 6)))]) + +(define_insn "*jcc_2" + [(set (pc) + (if_then_else (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (pc) + (label_ref (match_operand 0 "" ""))))] + "" + "%+j%c1\t%l0" + [(set_attr "type" "ibr") + (set_attr "modrm" "0") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 6)))]) + +;; In general it is not safe to assume too much about CCmode registers, +;; so simplify-rtx stops when it sees a second one. Under certain +;; conditions this is safe on x86, so help combine not create +;; +;; seta %al +;; testb %al, %al +;; je Lfoo + +(define_split + [(set (pc) + (if_then_else (ne (match_operator 0 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) + (if_then_else (match_dup 0) + (label_ref (match_dup 1)) + (pc)))] +{ + PUT_MODE (operands[0], VOIDmode); +}) + +(define_split + [(set (pc) + (if_then_else (eq (match_operator 0 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (const_int 0)) + (label_ref (match_operand 1 "" "")) + (pc)))] + "" + [(set (pc) + (if_then_else (match_dup 0) + (label_ref (match_dup 1)) + (pc)))] +{ + rtx new_op0 = copy_rtx (operands[0]); + operands[0] = new_op0; + PUT_MODE (new_op0, VOIDmode); + PUT_CODE (new_op0, ix86_reverse_condition (GET_CODE (new_op0), + GET_MODE (XEXP (new_op0, 0)))); + + /* Make sure that (a) the CCmode we have for the flags is strong + enough for the reversed compare or (b) we have a valid FP compare. */ + if (! ix86_comparison_operator (new_op0, VOIDmode)) + FAIL; +}) + +;; zero_extend in SImode is correct, since this is what combine pass +;; generates from shift insn with QImode operand. Actually, the mode of +;; operand 2 (bit offset operand) doesn't matter since bt insn takes +;; appropriate modulo of the bit offset value. + +(define_insn_and_split "*jcc_btdi_rex64" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:DI + (match_operand:DI 1 "register_operand" "r") + (const_int 1) + (zero_extend:SI + (match_operand:QI 2 "register_operand" "r"))) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:DI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (DImode, operands[2], QImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +;; avoid useless masking of bit offset operand +(define_insn_and_split "*jcc_btdi_mask_rex64" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:DI + (match_operand:DI 1 "register_operand" "r") + (const_int 1) + (and:SI + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")))]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && (INTVAL (operands[3]) & 0x3f) == 0x3f" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:DI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 4)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (DImode, operands[2], SImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +(define_insn_and_split "*jcc_btsi" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SI + (match_operand:SI 1 "register_operand" "r") + (const_int 1) + (zero_extend:SI + (match_operand:QI 2 "register_operand" "r"))) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +;; avoid useless masking of bit offset operand +(define_insn_and_split "*jcc_btsi_mask" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SI + (match_operand:SI 1 "register_operand" "r") + (const_int 1) + (and:SI + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")))]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && (INTVAL (operands[3]) & 0x1f) == 0x1f" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 4)) + (pc)))] + "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));") + +(define_insn_and_split "*jcc_btsi_1" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(and:SI + (lshiftrt:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "register_operand" "r")) + (const_int 1)) + (const_int 0)]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + +;; avoid useless masking of bit offset operand +(define_insn_and_split "*jcc_btsi_mask_1" + [(set (pc) + (if_then_else + (match_operator 0 "bt_comparison_operator" + [(and:SI + (lshiftrt:SI + (match_operand:SI 1 "register_operand" "r") + (subreg:QI + (and:SI + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")) 0)) + (const_int 1)) + (const_int 0)]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && (INTVAL (operands[3]) & 0x1f) == 0x1f" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SI + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 4)) + (pc)))] + "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));") + +;; Define combination compare-and-branch fp compare instructions to use +;; during early optimization. Splitting the operation apart early makes +;; for bad code when we want to reverse the operation. + +(define_insn "*fp_jcc_1_mixed" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f,x") + (match_operand 2 "nonimmediate_operand" "f,xm")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_1_sse" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "x") + (match_operand 2 "nonimmediate_operand" "xm")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_1_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_CMOVE + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_2_mixed" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f,x") + (match_operand 2 "nonimmediate_operand" "f,xm")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_MIX_SSE_I387 + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_2_sse" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "x") + (match_operand 2 "nonimmediate_operand" "xm")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "TARGET_SSE_MATH + && SSE_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_2_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && TARGET_CMOVE + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_3_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "nonimmediate_operand" "fm")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 + && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && !ix86_use_fcomi_compare (GET_CODE (operands[0])) + && SELECT_CC_MODE (GET_CODE (operands[0]), + operands[1], operands[2]) == CCFPmode + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_4_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "nonimmediate_operand" "fm")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "TARGET_80387 + && (GET_MODE (operands[1]) == SFmode || GET_MODE (operands[1]) == DFmode) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && !ix86_use_fcomi_compare (GET_CODE (operands[0])) + && SELECT_CC_MODE (GET_CODE (operands[0]), + operands[1], operands[2]) == CCFPmode + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_5_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_6_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "register_operand" "f")]) + (pc) + (label_ref (match_operand 3 "" "")))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +(define_insn "*fp_jcc_7_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "f") + (match_operand 2 "const0_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 4 "=a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[1])) + && GET_MODE (operands[1]) == GET_MODE (operands[2]) + && !ix86_use_fcomi_compare (GET_CODE (operands[0])) + && SELECT_CC_MODE (GET_CODE (operands[0]), + operands[1], operands[2]) == CCFPmode + && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))" + "#") + +;; The order of operands in *fp_jcc_8_387 is forced by combine in +;; simplify_comparison () function. Float operator is treated as RTX_OBJ +;; with a precedence over other operators and is always put in the first +;; place. Swap condition and operands to match ficom instruction. + +(define_insn "*fp_jcc_8_387" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")]) + (match_operand 3 "register_operand" "f,f")]) + (label_ref (match_operand 4 "" "")) + (pc))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 5 "=a,a"))] + "X87_FLOAT_MODE_P (GET_MODE (operands[3])) + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun)) + && GET_MODE (operands[1]) == GET_MODE (operands[3]) + && !ix86_use_fcomi_compare (swap_condition (GET_CODE (operands[0]))) + && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode + && ix86_fp_jump_nontrivial_p (swap_condition (GET_CODE (operands[0])))" + "#") + +(define_split + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "nonimmediate_operand" "")]) + (match_operand 3 "" "") + (match_operand 4 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG))] + "reload_completed" + [(const_int 0)] +{ + ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2], + operands[3], operands[4], NULL_RTX, NULL_RTX); + DONE; +}) + +(define_split + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "general_operand" "")]) + (match_operand 3 "" "") + (match_operand 4 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 5 "=a"))] + "reload_completed" + [(const_int 0)] +{ + ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2], + operands[3], operands[4], operands[5], NULL_RTX); + DONE; +}) + +(define_split + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "memory_operand" "")]) + (match_operand 3 "register_operand" "")]) + (match_operand 4 "" "") + (match_operand 5 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 6 "=a"))] + "reload_completed" + [(const_int 0)] +{ + operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[2]); + ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])), + operands[3], operands[7], + operands[4], operands[5], operands[6], NULL_RTX); + DONE; +}) + +;; %%% Kill this when reload knows how to do it. +(define_split + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operator 1 "float_operator" + [(match_operand:X87MODEI12 2 "register_operand" "")]) + (match_operand 3 "register_operand" "")]) + (match_operand 4 "" "") + (match_operand 5 "" ""))) + (clobber (reg:CCFP FPSR_REG)) + (clobber (reg:CCFP FLAGS_REG)) + (clobber (match_scratch:HI 6 "=a"))] + "reload_completed" + [(const_int 0)] +{ + operands[7] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]); + operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[7]); + ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])), + operands[3], operands[7], + operands[4], operands[5], operands[6], operands[2]); + DONE; +}) + +;; Unconditional and other jump instructions + +(define_insn "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" + "jmp\t%l0" + [(set_attr "type" "ibr") + (set (attr "length") + (if_then_else (and (ge (minus (match_dup 0) (pc)) + (const_int -126)) + (lt (minus (match_dup 0) (pc)) + (const_int 128))) + (const_int 2) + (const_int 5))) + (set_attr "modrm" "0")]) + +(define_expand "indirect_jump" + [(set (pc) (match_operand 0 "nonimmediate_operand" ""))] + "" + "") + +(define_insn "*indirect_jump" + [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm"))] + "" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +(define_expand "tablejump" + [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" "")) + (use (label_ref (match_operand 1 "" "")))])] + "" +{ + /* In PIC mode, the table entries are stored GOT (32-bit) or PC (64-bit) + relative. Convert the relative address to an absolute address. */ + if (flag_pic) + { + rtx op0, op1; + enum rtx_code code; + + /* We can't use @GOTOFF for text labels on VxWorks; + see gotoff_operand. */ + if (TARGET_64BIT || TARGET_VXWORKS_RTP) + { + code = PLUS; + op0 = operands[0]; + op1 = gen_rtx_LABEL_REF (Pmode, operands[1]); + } + else if (TARGET_MACHO || HAVE_AS_GOTOFF_IN_DATA) + { + code = PLUS; + op0 = operands[0]; + op1 = pic_offset_table_rtx; + } + else + { + code = MINUS; + op0 = pic_offset_table_rtx; + op1 = operands[0]; + } + + operands[0] = expand_simple_binop (Pmode, code, op0, op1, NULL_RTX, 0, + OPTAB_DIRECT); + } +}) + +(define_insn "*tablejump_1" + [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm")) + (use (label_ref (match_operand 1 "" "")))] + "" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +;; Convert setcc + movzbl to xor + setcc if operands don't overlap. + +(define_peephole2 + [(set (reg FLAGS_REG) (match_operand 0 "" "")) + (set (match_operand:QI 1 "register_operand" "") + (match_operator:QI 2 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (set (match_operand 3 "q_regs_operand" "") + (zero_extend (match_dup 1)))] + "(peep2_reg_dead_p (3, operands[1]) + || operands_match_p (operands[1], operands[3])) + && ! reg_overlap_mentioned_p (operands[3], operands[0])" + [(set (match_dup 4) (match_dup 0)) + (set (strict_low_part (match_dup 5)) + (match_dup 2))] +{ + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[5] = gen_lowpart (QImode, operands[3]); + ix86_expand_clear (operands[3]); +}) + +;; Similar, but match zero_extendhisi2_and, which adds a clobber. + +(define_peephole2 + [(set (reg FLAGS_REG) (match_operand 0 "" "")) + (set (match_operand:QI 1 "register_operand" "") + (match_operator:QI 2 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)])) + (parallel [(set (match_operand 3 "q_regs_operand" "") + (zero_extend (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "(peep2_reg_dead_p (3, operands[1]) + || operands_match_p (operands[1], operands[3])) + && ! reg_overlap_mentioned_p (operands[3], operands[0])" + [(set (match_dup 4) (match_dup 0)) + (set (strict_low_part (match_dup 5)) + (match_dup 2))] +{ + operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG); + operands[5] = gen_lowpart (QImode, operands[3]); + ix86_expand_clear (operands[3]); +}) + +;; Call instructions. + +;; The predicates normally associated with named expanders are not properly +;; checked for calls. This is a bug in the generic code, but it isn't that +;; easy to fix. Ignore it for now and be prepared to fix things up. + +;; P6 processors will jump to the address after the decrement when %esp +;; is used as a call operand, so they will execute return address as a code. +;; See Pentium Pro errata 70, Pentium 2 errata A33 and Pentium 3 errata E17. + +;; Call subroutine returning no value. + +(define_expand "call_pop" + [(parallel [(call (match_operand:QI 0 "" "") + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "" "")))])] + "!TARGET_64BIT" +{ + ix86_expand_call (NULL, operands[0], operands[1], operands[2], operands[3], 0); + DONE; +}) + +(define_insn "*call_pop_0" + [(call (mem:QI (match_operand:SI 0 "constant_call_address_operand" "")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "")))] + "!TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P0"; + else + return "call\t%P0"; +} + [(set_attr "type" "call")]) + +(define_insn "*call_pop_1" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i")))] + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" +{ + if (constant_call_address_operand (operands[0], Pmode)) + return "call\t%P0"; + return "call\t%A0"; +} + [(set_attr "type" "call")]) + +(define_insn "*sibcall_pop_1" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) + (match_operand:SI 1 "" "")) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 2 "immediate_operand" "i,i")))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + "@ + jmp\t%P0 + jmp\t%A0" + [(set_attr "type" "call")]) + +(define_expand "call" + [(call (match_operand:QI 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" ""))] + "" +{ + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 0); + DONE; +}) + +(define_expand "sibcall" + [(call (match_operand:QI 0 "" "") + (match_operand 1 "" "")) + (use (match_operand 2 "" ""))] + "" +{ + ix86_expand_call (NULL, operands[0], operands[1], operands[2], NULL, 1); + DONE; +}) + +(define_insn "*call_0" + [(call (mem:QI (match_operand 0 "constant_call_address_operand" "")) + (match_operand 1 "" ""))] + "" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P0"; + else + return "call\t%P0"; +} + [(set_attr "type" "call")]) + +(define_insn "*call_1" + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) + (match_operand 1 "" ""))] + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" +{ + if (constant_call_address_operand (operands[0], Pmode)) + return "call\t%P0"; + return "call\t%A0"; +} + [(set_attr "type" "call")]) + +(define_insn "*sibcall_1" + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) + (match_operand 1 "" ""))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + "@ + jmp\t%P0 + jmp\t%A0" + [(set_attr "type" "call")]) + +(define_insn "*call_1_rex64" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) + (match_operand 1 "" ""))] + "TARGET_64BIT && !SIBLING_CALL_P (insn) + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" +{ + if (constant_call_address_operand (operands[0], Pmode)) + return "call\t%P0"; + return "call\t%A0"; +} + [(set_attr "type" "call")]) + +(define_insn "*call_1_rex64_ms_sysv" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) + (match_operand 1 "" "")) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" +{ + if (constant_call_address_operand (operands[0], Pmode)) + return "call\t%P0"; + return "call\t%A0"; +} + [(set_attr "type" "call")]) + +(define_insn "*call_1_rex64_large" + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm")) + (match_operand 1 "" ""))] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" + "call\t%A0" + [(set_attr "type" "call")]) + +(define_insn "*sibcall_1_rex64" + [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U")) + (match_operand 1 "" ""))] + "TARGET_64BIT && SIBLING_CALL_P (insn)" + "@ + jmp\t%P0 + jmp\t%A0" + [(set_attr "type" "call")]) + +;; Call subroutine, returning value in operand 0 +(define_expand "call_value_pop" + [(parallel [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 4 "" "")))])] + "!TARGET_64BIT" +{ + ix86_expand_call (operands[0], operands[1], operands[2], + operands[3], operands[4], 0); + DONE; +}) + +(define_expand "call_value" + [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" ""))) + (use (match_operand:SI 3 "" ""))] + ;; Operand 2 not used on the i386. + "" +{ + ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 0); + DONE; +}) + +(define_expand "sibcall_value" + [(set (match_operand 0 "" "") + (call (match_operand:QI 1 "" "") + (match_operand:SI 2 "" ""))) + (use (match_operand:SI 3 "" ""))] + ;; Operand 2 not used on the i386. + "" +{ + ix86_expand_call (operands[0], operands[1], operands[2], operands[3], NULL, 1); + DONE; +}) + +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "" +{ + int i; + + /* In order to give reg-stack an easier job in validating two + coprocessor registers as containing a possible return value, + simply pretend the untyped call returns a complex long double + value. + + We can't use SSE_REGPARM_MAX here since callee is unprototyped + and should have the default ABI. */ + + ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387 + ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL), + operands[0], const0_rtx, + GEN_INT ((TARGET_64BIT + ? (DEFAULT_ABI == SYSV_ABI + ? X86_64_SSE_REGPARM_MAX + : X64_SSE_REGPARM_MAX) + : X86_32_SSE_REGPARM_MAX) + - 1), + NULL, 0); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}) + +;; Prologue and epilogue instructions + +;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and +;; all of memory. This blocks insns from being moved across this point. + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0")]) + +;; Do not schedule instructions accessing memory across this point. + +(define_expand "memory_blockage" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*memory_blockage" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BLOCKAGE))] + "" + "" + [(set_attr "length" "0")]) + +;; As USE insns aren't meaningful after reload, this is used instead +;; to prevent deleting instructions setting registers for PIC code +(define_insn "prologue_use" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_PROLOGUE_USE)] + "" + "" + [(set_attr "length" "0")]) + +;; Insn emitted into the body of a function to return from a function. +;; This is only done if the function's epilogue is known to be simple. +;; See comments for ix86_can_use_return_insn_p in i386.c. + +(define_expand "return" + [(return)] + "ix86_can_use_return_insn_p ()" +{ + if (crtl->args.pops_args) + { + rtx popc = GEN_INT (crtl->args.pops_args); + emit_jump_insn (gen_return_pop_internal (popc)); + DONE; + } +}) + +(define_insn "return_internal" + [(return)] + "reload_completed" + "ret" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET +;; instruction Athlon and K8 have. + +(define_insn "return_internal_long" + [(return) + (unspec [(const_int 0)] UNSPEC_REP)] + "reload_completed" + "rep\;ret" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "prefix_rep" "1") + (set_attr "modrm" "0")]) + +(define_insn "return_pop_internal" + [(return) + (use (match_operand:SI 0 "const_int_operand" ""))] + "reload_completed" + "ret\t%0" + [(set_attr "length" "3") + (set_attr "length_immediate" "2") + (set_attr "modrm" "0")]) + +(define_insn "return_indirect_internal" + [(return) + (use (match_operand:SI 0 "register_operand" "r"))] + "reload_completed" + "jmp\t%A0" + [(set_attr "type" "ibr") + (set_attr "length_immediate" "0")]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +;; Align to 16-byte boundary, max skip in op0. Used to avoid +;; branch prediction penalty for the third jump in a 16-byte +;; block on K8. + +(define_insn "align" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_ALIGN)] + "" +{ +#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN + ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, 4, (int)INTVAL (operands[0])); +#else + /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that. + The align insn is used to avoid 3 jump instructions in the row to improve + branch prediction and the benefits hardly outweigh the cost of extra 8 + nops on the average inserted by full alignment pseudo operation. */ +#endif + return ""; +} + [(set_attr "length" "16")]) + +(define_expand "prologue" + [(const_int 0)] + "" + "ix86_expand_prologue (); DONE;") + +(define_insn "set_got" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_SET_GOT)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + { return output_set_got (operands[0], NULL_RTX); } + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "set_got_labelled" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(label_ref (match_operand 1 "" ""))] + UNSPEC_SET_GOT)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + { return output_set_got (operands[0], operands[1]); } + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "set_got_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))] + "TARGET_64BIT" + "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}" + [(set_attr "type" "lea") + (set_attr "length" "6")]) + +(define_insn "set_rip_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(label_ref (match_operand 1 "" ""))] UNSPEC_SET_RIP))] + "TARGET_64BIT" + "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}" + [(set_attr "type" "lea") + (set_attr "length" "6")]) + +(define_insn "set_got_offset_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI + [(label_ref (match_operand 1 "" ""))] + UNSPEC_SET_GOT_OFFSET))] + "TARGET_64BIT" + "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}" + [(set_attr "type" "imov") + (set_attr "length" "11")]) + +(define_expand "epilogue" + [(const_int 0)] + "" + "ix86_expand_epilogue (1); DONE;") + +(define_expand "sibcall_epilogue" + [(const_int 0)] + "" + "ix86_expand_epilogue (0); DONE;") + +(define_expand "eh_return" + [(use (match_operand 0 "register_operand" ""))] + "" +{ + rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0]; + + /* Tricky bit: we write the address of the handler to which we will + be returning into someone else's stack frame, one word below the + stack address we wish to restore. */ + tmp = gen_rtx_PLUS (Pmode, arg_pointer_rtx, sa); + tmp = plus_constant (tmp, -UNITS_PER_WORD); + tmp = gen_rtx_MEM (Pmode, tmp); + emit_move_insn (tmp, ra); + + emit_jump_insn (gen_eh_return_internal ()); + emit_barrier (); + DONE; +}) + +(define_insn_and_split "eh_return_internal" + [(eh_return)] + "" + "#" + "epilogue_completed" + [(const_int 0)] + "ix86_expand_epilogue (2); DONE;") + +(define_insn "leave" + [(set (reg:SI SP_REG) (plus:SI (reg:SI BP_REG) (const_int 4))) + (set (reg:SI BP_REG) (mem:SI (reg:SI BP_REG))) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" + "leave" + [(set_attr "type" "leave")]) + +(define_insn "leave_rex64" + [(set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8))) + (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG))) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" + "leave" + [(set_attr "type" "leave")]) + +(define_expand "ffssi2" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" ""))) + (clobber (match_scratch:SI 2 "")) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (TARGET_CMOVE) + { + emit_insn (gen_ffs_cmove (operands[0], operands[1])); + DONE; + } +}) + +(define_expand "ffs_cmove" + [(set (match_dup 2) (const_int -1)) + (parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "") + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "") + (ctz:SI (match_dup 1)))]) + (set (match_dup 0) (if_then_else:SI + (eq (reg:CCZ FLAGS_REG) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_CMOVE" + "operands[2] = gen_reg_rtx (SImode);") + +(define_insn_and_split "*ffs_no_cmove" + [(set (match_operand:SI 0 "register_operand" "=r") + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (match_scratch:SI 2 "=&q")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_CMOVE" + "#" + "&& reload_completed" + [(parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_dup 1) (const_int 0))) + (set (match_dup 0) (ctz:SI (match_dup 1)))]) + (set (strict_low_part (match_dup 3)) + (eq:QI (reg:CCZ FLAGS_REG) (const_int 0))) + (parallel [(set (match_dup 2) (neg:SI (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[3] = gen_lowpart (QImode, operands[2]); + ix86_expand_clear (operands[2]); +}) + +(define_insn "*ffssi_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r") + (ctz:SI (match_dup 1)))] + "" + "bsf{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) + +(define_expand "ffsdi2" + [(set (match_dup 2) (const_int -1)) + (parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "") + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "") + (ctz:DI (match_dup 1)))]) + (set (match_dup 0) (if_then_else:DI + (eq (reg:CCZ FLAGS_REG) (const_int 0)) + (match_dup 2) + (match_dup 0))) + (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" + "operands[2] = gen_reg_rtx (DImode);") + +(define_insn "*ffsdi_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm") + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (ctz:DI (match_dup 1)))] + "TARGET_64BIT" + "bsf{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) + +(define_insn "ctzsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "" + "bsf{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) + +(define_insn "ctzdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (ctz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "bsf{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1")]) + +(define_expand "clzsi2" + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (minus:SI (const_int 31) + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31))) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (TARGET_ABM) + { + emit_insn (gen_clzsi2_abm (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "clzsi2_abm" + [(set (match_operand:SI 0 "register_operand" "=r") + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ABM" + "lzcnt{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) + +(define_insn "*bsr" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI (const_int 31) + (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "" + "bsr{l}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1") + (set_attr "mode" "SI")]) + +(define_insn "popcount2" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*popcount2_cmp" + [(set (reg FLAGS_REG) + (compare + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "rm")) + (const_int 0))) + (set (match_operand:SWI248 0 "register_operand" "=r") + (popcount:SWI248 (match_dup 1)))] + "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "")]) + +(define_insn "*popcountsi2_cmp_zext" + [(set (reg FLAGS_REG) + (compare + (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) + (const_int 0))) + (set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI(popcount:SI (match_dup 1))))] + "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %0|%0, %1}"; +#else + return "popcnt{}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) + +(define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "") + (bswap:SI (match_operand:SI 1 "register_operand" "")))] + "" +{ + if (!TARGET_BSWAP) + { + rtx x = operands[0]; + + emit_move_insn (x, operands[1]); + emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x))); + emit_insn (gen_rotlsi3 (x, x, GEN_INT (16))); + emit_insn (gen_bswaphi_lowpart (gen_lowpart (HImode, x))); + DONE; + } +}) + +(define_insn "*bswapsi_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (bswap:SI (match_operand:SI 1 "register_operand" "0")))] + "TARGET_BSWAP" + "bswap\t%0" + [(set_attr "prefix_0f" "1") + (set_attr "length" "2")]) + +(define_insn "*bswaphi_lowpart_1" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r")) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_XCHGB || optimize_function_for_size_p (cfun)" + "@ + xchg{b}\t{%h0, %b0|%b0, %h0} + rol{w}\t{$8, %0|%0, 8}" + [(set_attr "length" "2,4") + (set_attr "mode" "QI,HI")]) + +(define_insn "bswaphi_lowpart" + [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r")) + (bswap:HI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "rol{w}\t{$8, %0|%0, 8}" + [(set_attr "length" "4") + (set_attr "mode" "HI")]) + +(define_insn "bswapdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (bswap:DI (match_operand:DI 1 "register_operand" "0")))] + "TARGET_64BIT" + "bswap\t%0" + [(set_attr "prefix_0f" "1") + (set_attr "length" "3")]) + +(define_expand "clzdi2" + [(parallel + [(set (match_operand:DI 0 "register_operand" "") + (minus:DI (const_int 63) + (clz:DI (match_operand:DI 1 "nonimmediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT" +{ + if (TARGET_ABM) + { + emit_insn (gen_clzdi2_abm (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "clzdi2_abm" + [(set (match_operand:DI 0 "register_operand" "=r") + (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_ABM" + "lzcnt{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "DI")]) + +(define_insn "*bsr_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI (const_int 63) + (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "bsr{q}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1") + (set_attr "mode" "DI")]) + +(define_expand "clzhi2" + [(parallel + [(set (match_operand:HI 0 "register_operand" "") + (minus:HI (const_int 15) + (clz:HI (match_operand:HI 1 "nonimmediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (match_dup 0) (xor:HI (match_dup 0) (const_int 15))) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (TARGET_ABM) + { + emit_insn (gen_clzhi2_abm (operands[0], operands[1])); + DONE; + } +}) + +(define_insn "clzhi2_abm" + [(set (match_operand:HI 0 "register_operand" "=r") + (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_ABM" + "lzcnt{w}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "HI")]) + +(define_insn "*bsrhi" + [(set (match_operand:HI 0 "register_operand" "=r") + (minus:HI (const_int 15) + (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm")))) + (clobber (reg:CC FLAGS_REG))] + "" + "bsr{w}\t{%1, %0|%0, %1}" + [(set_attr "prefix_0f" "1") + (set_attr "mode" "HI")]) + +(define_expand "paritydi2" + [(set (match_operand:DI 0 "register_operand" "") + (parity:DI (match_operand:DI 1 "register_operand" "")))] + "! TARGET_POPCNT" +{ + rtx scratch = gen_reg_rtx (QImode); + rtx cond; + + emit_insn (gen_paritydi2_cmp (NULL_RTX, NULL_RTX, + NULL_RTX, operands[1])); + + cond = gen_rtx_fmt_ee (ORDERED, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, scratch, cond)); + + if (TARGET_64BIT) + emit_insn (gen_zero_extendqidi2 (operands[0], scratch)); + else + { + rtx tmp = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extendqisi2 (tmp, scratch)); + emit_insn (gen_zero_extendsidi2 (operands[0], tmp)); + } + DONE; +}) + +(define_insn_and_split "paritydi2_cmp" + [(set (reg:CC FLAGS_REG) + (parity:CC (match_operand:DI 3 "register_operand" "0"))) + (clobber (match_scratch:DI 0 "=r")) + (clobber (match_scratch:SI 1 "=&r")) + (clobber (match_scratch:HI 2 "=Q"))] + "! TARGET_POPCNT" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 1) + (xor:SI (match_dup 1) (match_dup 4))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (reg:CC FLAGS_REG) + (parity:CC (match_dup 1))) + (clobber (match_dup 1)) + (clobber (match_dup 2))])] +{ + operands[4] = gen_lowpart (SImode, operands[3]); + + if (TARGET_64BIT) + { + emit_move_insn (operands[1], gen_lowpart (SImode, operands[3])); + emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32))); + } + else + operands[1] = gen_highpart (SImode, operands[3]); +}) + +(define_expand "paritysi2" + [(set (match_operand:SI 0 "register_operand" "") + (parity:SI (match_operand:SI 1 "register_operand" "")))] + "! TARGET_POPCNT" +{ + rtx scratch = gen_reg_rtx (QImode); + rtx cond; + + emit_insn (gen_paritysi2_cmp (NULL_RTX, NULL_RTX, operands[1])); + + cond = gen_rtx_fmt_ee (ORDERED, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, scratch, cond)); + + emit_insn (gen_zero_extendqisi2 (operands[0], scratch)); + DONE; +}) + +(define_insn_and_split "paritysi2_cmp" + [(set (reg:CC FLAGS_REG) + (parity:CC (match_operand:SI 2 "register_operand" "0"))) + (clobber (match_scratch:SI 0 "=r")) + (clobber (match_scratch:HI 1 "=&Q"))] + "! TARGET_POPCNT" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 1) + (xor:HI (match_dup 1) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))]) + (parallel + [(set (reg:CC FLAGS_REG) + (parity:CC (match_dup 1))) + (clobber (match_dup 1))])] +{ + operands[3] = gen_lowpart (HImode, operands[2]); + + emit_move_insn (operands[1], gen_lowpart (HImode, operands[2])); + emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16))); +}) + +(define_insn "*parityhi2_cmp" + [(set (reg:CC FLAGS_REG) + (parity:CC (match_operand:HI 1 "register_operand" "0"))) + (clobber (match_scratch:HI 0 "=Q"))] + "! TARGET_POPCNT" + "xor{b}\t{%h0, %b0|%b0, %h0}" + [(set_attr "length" "2") + (set_attr "mode" "HI")]) + +(define_insn "*parityqi2_cmp" + [(set (reg:CC FLAGS_REG) + (parity:CC (match_operand:QI 0 "register_operand" "q")))] + "! TARGET_POPCNT" + "test{b}\t%0, %0" + [(set_attr "length" "2") + (set_attr "mode" "QI")]) + +;; Thread-local storage patterns for ELF. +;; +;; Note that these code sequences must appear exactly as shown +;; in order to allow linker relaxation. + +(define_insn "*tls_global_dynamic_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "tls_symbolic_operand" "") + (match_operand:SI 3 "call_insn_operand" "")] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU_TLS" + "lea{l}\t{%a2@TLSGD(,%1,1), %0|%0, %a2@TLSGD[%1*1]}\;call\t%P3" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_insn "*tls_global_dynamic_32_sun" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "tls_symbolic_operand" "") + (match_operand:SI 3 "call_insn_operand" "")] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_SUN_TLS" + "lea{l}\t{%a2@DTLNDX(%1), %4|%4, %a2@DTLNDX[%1]} + push{l}\t%4\;call\t%a2@TLSPLT\;pop{l}\t%4\;nop" + [(set_attr "type" "multi") + (set_attr "length" "14")]) + +(define_expand "tls_global_dynamic_32" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI + [(match_dup 2) + (match_operand:SI 1 "tls_symbolic_operand" "") + (match_dup 3)] + UNSPEC_TLS_GD)) + (clobber (match_scratch:SI 4 "")) + (clobber (match_scratch:SI 5 "")) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (flag_pic) + operands[2] = pic_offset_table_rtx; + else + { + operands[2] = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (operands[2])); + } + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_32 + (operands[0], operands[1], operands[2])); + DONE; + } + operands[3] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_global_dynamic_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (call:DI (mem:QI (match_operand:DI 2 "call_insn_operand" "")) + (match_operand:DI 3 "" ""))) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_GD)] + "TARGET_64BIT" + { return ".byte\t0x66\n\tlea{q}\t{%a1@TLSGD(%%rip), %%rdi|rdi, %a1@TLSGD[rip]}\n" ASM_SHORT "0x6666\n\trex64\n\tcall\t%P2"; } + [(set_attr "type" "multi") + (set_attr "length" "16")]) + +(define_expand "tls_global_dynamic_64" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (call:DI (mem:QI (match_dup 2)) (const_int 0))) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLS_GD)])] + "" +{ + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_64 + (operands[0], operands[1])); + DONE; + } + operands[2] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_local_dynamic_base_32_gnu" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "=d")) + (clobber (match_scratch:SI 4 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU_TLS" + "lea{l}\t{%&@TLSLDM(%1), %0|%0, %&@TLSLDM[%1]}\;call\t%P2" + [(set_attr "type" "multi") + (set_attr "length" "11")]) + +(define_insn "*tls_local_dynamic_base_32_sun" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "=d")) + (clobber (match_scratch:SI 4 "=c")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_SUN_TLS" + "lea{l}\t{%&@TMDNX(%1), %3|%3, %&@TMDNX[%1]} + push{l}\t%3\;call\t%&@TLSPLT\;pop{l}\t%3" + [(set_attr "type" "multi") + (set_attr "length" "13")]) + +(define_expand "tls_local_dynamic_base_32" + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_dup 1) (match_dup 2)] + UNSPEC_TLS_LD_BASE)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 "")) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (flag_pic) + operands[1] = pic_offset_table_rtx; + else + { + operands[1] = gen_reg_rtx (Pmode); + emit_insn (gen_set_got (operands[1])); + } + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_32 + (operands[0], ix86_tls_module_base (), operands[1])); + DONE; + } + operands[2] = ix86_tls_get_addr (); +}) + +(define_insn "*tls_local_dynamic_base_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (call:DI (mem:QI (match_operand:DI 1 "call_insn_operand" "")) + (match_operand:DI 2 "" ""))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)] + "TARGET_64BIT" + "lea{q}\t{%&@TLSLD(%%rip), %%rdi|rdi, %&@TLSLD[rip]}\;call\t%P1" + [(set_attr "type" "multi") + (set_attr "length" "12")]) + +(define_expand "tls_local_dynamic_base_64" + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (call:DI (mem:QI (match_dup 1)) (const_int 0))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])] + "" +{ + if (TARGET_GNU2_TLS) + { + emit_insn (gen_tls_dynamic_gnu2_64 + (operands[0], ix86_tls_module_base ())); + DONE; + } + operands[1] = ix86_tls_get_addr (); +}) + +;; Local dynamic of a single variable is a lose. Show combine how +;; to convert that back to global dynamic. + +(define_insn_and_split "*tls_local_dynamic_32_once" + [(set (match_operand:SI 0 "register_operand" "=a") + (plus:SI (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "call_insn_operand" "")] + UNSPEC_TLS_LD_BASE) + (const:SI (unspec:SI + [(match_operand:SI 3 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) + (clobber (reg:CC FLAGS_REG))] + "" + "#" + "" + [(parallel [(set (match_dup 0) + (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)] + UNSPEC_TLS_GD)) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (clobber (reg:CC FLAGS_REG))])] + "") + +;; Load and add the thread base pointer from %gs:0. + +(define_insn "*load_tp_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(const_int 0)] UNSPEC_TP))] + "!TARGET_64BIT" + "mov{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}" + [(set_attr "type" "imov") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*add_tp_si" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP) + (match_operand:SI 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "add{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}" + [(set_attr "type" "alu") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*load_tp_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(const_int 0)] UNSPEC_TP))] + "TARGET_64BIT" + "mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}" + [(set_attr "type" "imov") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +(define_insn "*add_tp_di" + [(set (match_operand:DI 0 "register_operand" "=r") + (plus:DI (unspec:DI [(const_int 0)] UNSPEC_TP) + (match_operand:DI 1 "register_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "add{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}" + [(set_attr "type" "alu") + (set_attr "modrm" "0") + (set_attr "length" "7") + (set_attr "memory" "load") + (set_attr "imm_disp" "false")]) + +;; GNU2 TLS patterns can be split. + +(define_expand "tls_dynamic_gnu2_32" + [(set (match_dup 3) + (plus:SI (match_operand:SI 2 "register_operand" "") + (const:SI + (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC)))) + (parallel + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_dup 1) (match_dup 3) + (match_dup 2) (reg:SI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))])] + "!TARGET_64BIT && TARGET_GNU2_TLS" +{ + operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) + +(define_insn "*tls_dynamic_lea_32" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "b") + (const:SI + (unspec:SI [(match_operand:SI 2 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC))))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "lea{l}\t{%a2@TLSDESC(%1), %0|%0, %a2@TLSDESC[%1]}" + [(set_attr "type" "lea") + (set_attr "mode" "SI") + (set_attr "length" "6") + (set_attr "length_address" "4")]) + +(define_insn "*tls_dynamic_call_32" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "") + (match_operand:SI 2 "register_operand" "0") + ;; we have to make sure %ebx still points to the GOT + (match_operand:SI 3 "register_operand" "b") + (reg:SI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}" + [(set_attr "type" "call") + (set_attr "length" "2") + (set_attr "length_address" "0")]) + +(define_insn_and_split "*tls_dynamic_gnu2_combine_32" + [(set (match_operand:SI 0 "register_operand" "=&a") + (plus:SI + (unspec:SI [(match_operand:SI 3 "tls_modbase_operand" "") + (match_operand:SI 4 "" "") + (match_operand:SI 2 "register_operand" "b") + (reg:SI SP_REG)] + UNSPEC_TLSDESC) + (const:SI (unspec:SI + [(match_operand:SI 1 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_GNU2_TLS" + "#" + "" + [(set (match_dup 0) (match_dup 5))] +{ + operands[5] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); + emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2])); +}) + +(define_expand "tls_dynamic_gnu2_64" + [(set (match_dup 2) + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC)) + (parallel + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_dup 1) (match_dup 2) (reg:DI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT && TARGET_GNU2_TLS" +{ + operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) + +(define_insn "*tls_dynamic_lea_64" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_TLSDESC))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[rip]}" + [(set_attr "type" "lea") + (set_attr "mode" "DI") + (set_attr "length" "7") + (set_attr "length_address" "4")]) + +(define_insn "*tls_dynamic_call_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "") + (match_operand:DI 2 "register_operand" "0") + (reg:DI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}" + [(set_attr "type" "call") + (set_attr "length" "2") + (set_attr "length_address" "0")]) + +(define_insn_and_split "*tls_dynamic_gnu2_combine_64" + [(set (match_operand:DI 0 "register_operand" "=&a") + (plus:DI + (unspec:DI [(match_operand:DI 2 "tls_modbase_operand" "") + (match_operand:DI 3 "" "") + (reg:DI SP_REG)] + UNSPEC_TLSDESC) + (const:DI (unspec:DI + [(match_operand:DI 1 "tls_symbolic_operand" "")] + UNSPEC_DTPOFF)))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_GNU2_TLS" + "#" + "" + [(set (match_dup 0) (match_dup 4))] +{ + operands[4] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); + emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1])); +}) + +;; + +;; These patterns match the binary 387 instructions for addM3, subM3, +;; mulM3 and divM3. There are three patterns for each of DFmode and +;; SFmode. The first is the normal insn, the second the same insn but +;; with one operand a conversion, and the third the same insn but with +;; the other operand a conversion. The conversion may be SFmode or +;; SImode if the target mode DFmode, but only SImode if the target mode +;; is SFmode. + +;; Gcc is slightly more smart about handling normal two address instructions +;; so use special patterns for add and mull. + +(define_insn "*fop__comm_mixed_avx" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))] + "AVX_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (eq_attr "alternative" "1") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd")) + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop")))) + (set_attr "prefix" "orig,maybe_vex") + (set_attr "mode" "")]) + +(define_insn "*fop__comm_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (eq_attr "alternative" "1") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd")) + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop")))) + (set_attr "mode" "")]) + +(define_insn "*fop__comm_avx" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd"))) + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*fop__comm_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (const_string "sseadd"))) + (set_attr "mode" "")]) + +(define_insn "*fop__comm_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "fm")]))] + "TARGET_80387 + && COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_insn "*fop__1_mixed_avx" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,x") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))] + "AVX_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(and (eq_attr "alternative" "2") + (match_operand:MODEF 3 "mult_operator" "")) + (const_string "ssemul") + (and (eq_attr "alternative" "2") + (match_operand:MODEF 3 "div_operator" "")) + (const_string "ssediv") + (eq_attr "alternative" "2") + (const_string "sseadd") + (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "prefix" "orig,orig,maybe_vex") + (set_attr "mode" "")]) + +(define_insn "*fop__1_mixed" + [(set (match_operand:MODEF 0 "register_operand" "=f,f,x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,0") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_MIX_SSE_I387 + && !COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(and (eq_attr "alternative" "2") + (match_operand:MODEF 3 "mult_operator" "")) + (const_string "ssemul") + (and (eq_attr "alternative" "2") + (match_operand:MODEF 3 "div_operator" "")) + (const_string "ssediv") + (eq_attr "alternative" "2") + (const_string "sseadd") + (match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_insn "*rcpsf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RCP))] + "TARGET_SSE_MATH" + "%vrcpss\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sse") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SF")]) + +(define_insn "*fop__1_avx" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "register_operand" "x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "ssediv") + ] + (const_string "sseadd"))) + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*fop__1_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "ssemul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "ssediv") + ] + (const_string "sseadd"))) + (set_attr "mode" "")]) + +;; This pattern is not fully shadowed by the pattern above. +(define_insn "*fop__1_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm") + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0")]))] + "TARGET_80387 && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + && !COMMUTATIVE_ARITH_P (operands[3]) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +;; ??? Add SSE splitters for these! +(define_insn "*fop__2_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (match_operator:MODEF 3 "binary_fp_operator" + [(float:MODEF + (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) + (match_operand:MODEF 2 "register_operand" "0,0")]))] + "TARGET_80387 && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn "*fop__3_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f,f") + (match_operator:MODEF 3 "binary_fp_operator" + [(match_operand:MODEF 1 "register_operand" "0,0") + (float:MODEF + (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:MODEF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:MODEF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn "*fop_df_4_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float_extend:DF + (match_operand:SF 1 "nonimmediate_operand" "fm,0")) + (match_operand:DF 2 "register_operand" "0,f")]))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH) + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_df_5_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(match_operand:DF 1 "register_operand" "0,f") + (float_extend:DF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_df_6_i387" + [(set (match_operand:DF 0 "register_operand" "=f,f") + (match_operator:DF 3 "binary_fp_operator" + [(float_extend:DF + (match_operand:SF 1 "register_operand" "0,f")) + (float_extend:DF + (match_operand:SF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:DF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:DF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "SF")]) + +(define_insn "*fop_xf_comm_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "%0") + (match_operand:XF 2 "register_operand" "f")]))] + "TARGET_80387 + && COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (if_then_else (match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (const_string "fop"))) + (set_attr "mode" "XF")]) + +(define_insn "*fop_xf_1_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (match_operand:XF 2 "register_operand" "f,0")]))] + "TARGET_80387 + && !COMMUTATIVE_ARITH_P (operands[3])" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "XF")]) + +(define_insn "*fop_xf_2_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float:XF + (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r")) + (match_operand:XF 2 "register_operand" "0,0")]))] + "TARGET_80387 && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn "*fop_xf_3_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,0") + (float:XF + (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))] + "TARGET_80387 && (TARGET_USE_MODE_FIOP || optimize_function_for_size_p (cfun))" + "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "fp_int_src" "true") + (set_attr "mode" "")]) + +(define_insn "*fop_xf_4_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float_extend:XF + (match_operand:MODEF 1 "nonimmediate_operand" "fm,0")) + (match_operand:XF 2 "register_operand" "0,f")]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_insn "*fop_xf_5_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(match_operand:XF 1 "register_operand" "0,f") + (float_extend:XF + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_insn "*fop_xf_6_i387" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (match_operator:XF 3 "binary_fp_operator" + [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0,f")) + (float_extend:XF + (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))] + "TARGET_80387" + "* return output_387_binary_op (insn, operands);" + [(set (attr "type") + (cond [(match_operand:XF 3 "mult_operator" "") + (const_string "fmul") + (match_operand:XF 3 "div_operator" "") + (const_string "fdiv") + ] + (const_string "fop"))) + (set_attr "mode" "")]) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "binary_fp_operator" + [(float (match_operand:X87MODEI12 1 "register_operand" "")) + (match_operand 2 "register_operand" "")]))] + "reload_completed + && X87_FLOAT_MODE_P (GET_MODE (operands[0]))" + [(const_int 0)] +{ + operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]); + operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (GET_CODE (operands[3]), + GET_MODE (operands[3]), + operands[4], + operands[2]))); + ix86_free_from_memory (GET_MODE (operands[1])); + DONE; +}) + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "binary_fp_operator" + [(match_operand 1 "register_operand" "") + (float (match_operand:X87MODEI12 2 "register_operand" ""))]))] + "reload_completed + && X87_FLOAT_MODE_P (GET_MODE (operands[0]))" + [(const_int 0)] +{ + operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]); + operands[4] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[4]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], + gen_rtx_fmt_ee (GET_CODE (operands[3]), + GET_MODE (operands[3]), + operands[1], + operands[4]))); + ix86_free_from_memory (GET_MODE (operands[2])); + DONE; +}) + +;; FPU special functions. + +;; This pattern implements a no-op XFmode truncation for +;; all fancy i386 XFmode math functions. + +(define_insn "truncxf2_i387_noop_unspec" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_TRUNC_NOOP))] + "TARGET_USE_FANCY_MATH_387" + "* return output_387_reg_move (insn, operands);" + [(set_attr "type" "fmov") + (set_attr "mode" "")]) + +(define_insn "sqrtxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF (match_operand:XF 1 "register_operand" "0")))] + "TARGET_USE_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct")]) + +(define_insn "sqrt_extendxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (sqrt:XF + (float_extend:XF + (match_operand:MODEF 1 "register_operand" "0"))))] + "TARGET_USE_FANCY_MATH_387" + "fsqrt" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF") + (set_attr "athlon_decode" "direct") + (set_attr "amdfam10_decode" "direct")]) + +(define_insn "*rsqrtsf2_sse" + [(set (match_operand:SF 0 "register_operand" "=x") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")] + UNSPEC_RSQRT))] + "TARGET_SSE_MATH" + "%vrsqrtss\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sse") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SF")]) + +(define_expand "rsqrtsf2" + [(set (match_operand:SF 0 "register_operand" "") + (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "")] + UNSPEC_RSQRT))] + "TARGET_SSE_MATH" +{ + ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1); + DONE; +}) + +(define_insn "*sqrt2_sse" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (sqrt:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "%vsqrts\t{%1, %d0|%d0, %1}" + [(set_attr "type" "sse") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "") + (set_attr "athlon_decode" "*") + (set_attr "amdfam10_decode" "*")]) + +(define_expand "sqrt2" + [(set (match_operand:MODEF 0 "register_operand" "") + (sqrt:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "")))] + "TARGET_USE_FANCY_MATH_387 + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + if (mode == SFmode + && TARGET_SSE_MATH && TARGET_RECIP && !optimize_function_for_size_p (cfun) + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0); + DONE; + } + + if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = force_reg (mode, operands[1]); + + emit_insn (gen_sqrt_extendxf2_i387 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop_unspec (operands[0], op0)); + DONE; + } +}) + +(define_insn "fpremxf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FPREM_F)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FPREM_U)) + (set (reg:CCFP FPSR_REG) + (unspec:CCFP [(match_dup 2) (match_dup 3)] + UNSPEC_C2_FLAG))] + "TARGET_USE_FANCY_MATH_387" + "fprem" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "fmodxf3" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "general_operand" "")) + (use (match_operand:XF 2 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_move_insn (op2, operands[2]); + emit_move_insn (op1, operands[1]); + + emit_label (label); + emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_expand "fmod3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op2, operands[2])); + emit_insn (gen_extendxf2 (op1, operands[1])); + + emit_label (label); + emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + /* Truncate the result properly for strict SSE math. */ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !TARGET_MIX_SSE_I387) + emit_insn (gen_truncxf2 (operands[0], op1)); + else + emit_insn (gen_truncxf2_i387_noop_unspec (operands[0], op1)); + + DONE; +}) + +(define_insn "fprem1xf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FPREM1_F)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FPREM1_U)) + (set (reg:CCFP FPSR_REG) + (unspec:CCFP [(match_dup 2) (match_dup 3)] + UNSPEC_C2_FLAG))] + "TARGET_USE_FANCY_MATH_387" + "fprem1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "remainderxf3" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "general_operand" "")) + (use (match_operand:XF 2 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_move_insn (op2, operands[2]); + emit_move_insn (op1, operands[1]); + + emit_label (label); + emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_expand "remainder3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387" +{ + rtx label = gen_label_rtx (); + + rtx op1 = gen_reg_rtx (XFmode); + rtx op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op2, operands[2])); + emit_insn (gen_extendxf2 (op1, operands[1])); + + emit_label (label); + + emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2)); + ix86_emit_fp_unordered_jump (label); + LABEL_NUSES (label) = 1; + + /* Truncate the result properly for strict SSE math. */ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !TARGET_MIX_SSE_I387) + emit_insn (gen_truncxf2 (operands[0], op1)); + else + emit_insn (gen_truncxf2_i387_noop_unspec (operands[0], op1)); + + DONE; +}) + +(define_insn "*sinxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_SIN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*sin_extendxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0"))] + UNSPEC_SIN))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fsin" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*cosxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*cos_extendxf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0"))] + UNSPEC_COS))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fcos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +;; When sincos pattern is defined, sin and cos builtin functions will be +;; expanded to sincos pattern with one of its outputs left unused. +;; CSE pass will figure out if two sincos patterns can be combined, +;; otherwise sincos pattern will be split back to sin or cos pattern, +;; depending on the unused output. + +(define_insn "sincosxf3" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fsincos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 2 "register_operand" "")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) + && !(reload_completed || reload_in_progress)" + [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))] + "") + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 2 "register_operand" "")] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) + && !(reload_completed || reload_in_progress)" + [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))] + "") + +(define_insn "sincos_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" "0"))] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fsincos" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" ""))] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) + && !(reload_completed || reload_in_progress)" + [(set (match_dup 1) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))] + "") + +(define_split + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" ""))] + UNSPEC_SINCOS_COS)) + (set (match_operand:XF 1 "register_operand" "") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[1])) + && !(reload_completed || reload_in_progress)" + [(set (match_dup 0) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))] + "") + +(define_expand "sincos3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_sincos_extendxf3_i387 (op0, op1, operands[2])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[1], op1)); + DONE; +}) + +(define_insn "fptanxf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (match_operand:XF 3 "const_double_operand" "F")) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_TAN))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && standard_80387_constant_p (operands[3]) == 2" + "fptan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fptan_extendxf4_i387" + [(set (match_operand:MODEF 0 "register_operand" "=f") + (match_operand:MODEF 3 "const_double_operand" "F")) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" "0"))] + UNSPEC_TAN))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations + && standard_80387_constant_p (operands[3]) == 2" + "fptan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "tanxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx one = gen_reg_rtx (XFmode); + rtx op2 = CONST1_RTX (XFmode); /* fld1 */ + + emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "tan2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx one = gen_reg_rtx (mode); + rtx op2 = CONST1_RTX (mode); /* fld1 */ + + emit_insn (gen_fptan_extendxf4_i387 (one, op0, + operands[1], op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "*fpatanxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0") + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fpatan_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0")) + (float_extend:XF + (match_operand:MODEF 2 "register_operand" "u"))] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fpatan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "atan2xf3" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 2 "register_operand" "") + (match_operand:XF 1 "register_operand" "")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "") + +(define_expand "atan23" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + emit_insn (gen_fpatan_extendxf3_i387 (op0, operands[2], operands[1])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "atanxf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 2) + (match_operand:XF 1 "register_operand" "")] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "atan2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (mode); + emit_move_insn (op2, CONST1_RTX (mode)); /* fld1 */ + + emit_insn (gen_fpatan_extendxf3_i387 (op0, op2, operands[1])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "asinxf2" + [(set (match_dup 2) + (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 1))) + (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2))) + (set (match_dup 5) (sqrt:XF (match_dup 4))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 5) (match_dup 1)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 6 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 2; i < 6; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "asin2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + if (optimize_insn_for_size_p ()) + FAIL; + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_asinxf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "acosxf2" + [(set (match_dup 2) + (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 1))) + (set (match_dup 4) (minus:XF (match_dup 3) (match_dup 2))) + (set (match_dup 5) (sqrt:XF (match_dup 4))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 1) (match_dup 5)] + UNSPEC_FPATAN)) + (clobber (match_scratch:XF 6 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 2; i < 6; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[3], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "acos2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + if (optimize_insn_for_size_p ()) + FAIL; + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_acosxf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fyl2xxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0") + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fyl2x" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fyl2x_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0")) + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fyl2x" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "logxf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], standard_80387_constant_rtx (4)); /* fldln2 */ +}) + +(define_expand "log2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (4)); /* fldln2 */ + + emit_insn (gen_fyl2x_extendxf3_i387 (op0, operands[1], op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "log10xf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], standard_80387_constant_rtx (3)); /* fldlg2 */ +}) + +(define_expand "log102" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (3)); /* fldlg2 */ + + emit_insn (gen_fyl2x_extendxf3_i387 (op0, operands[1], op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "log2xf2" + [(parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 2)] UNSPEC_FYL2X)) + (clobber (match_scratch:XF 3 ""))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); + emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "log22" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */ + + emit_insn (gen_fyl2x_extendxf3_i387 (op0, operands[1], op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fyl2xp1xf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0") + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2XP1)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fyl2xp1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fyl2xp1_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 1 "register_operand" "0")) + (match_operand:XF 2 "register_operand" "u")] + UNSPEC_FYL2XP1)) + (clobber (match_scratch:XF 3 "=2"))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fyl2xp1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "log1pxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + ix86_emit_i387_log1p (operands[0], operands[1]); + DONE; +}) + +(define_expand "log1p2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + + operands[1] = gen_rtx_FLOAT_EXTEND (XFmode, operands[1]); + + ix86_emit_i387_log1p (op0, operands[1]); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fxtractxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2)] UNSPEC_XTRACT_EXP))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fxtract" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "fxtract_extendxf3_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(float_extend:XF + (match_operand:MODEF 2 "register_operand" "0"))] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_XTRACT_EXP))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "fxtract" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "logbxf2" + [(parallel [(set (match_dup 2) + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_XTRACT_FRACT)) + (set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + operands[2] = gen_reg_rtx (XFmode); +}) + +(define_expand "logb2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtract_extendxf3_i387 (op0, op1, operands[1])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op1)); + DONE; +}) + +(define_expand "ilogbxf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1])); + emit_insn (gen_fix_truncxfsi2 (operands[0], op1)); + DONE; +}) + +(define_expand "ilogb2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_fxtract_extendxf3_i387 (op0, op1, operands[1])); + emit_insn (gen_fix_truncxfsi2 (operands[0], op1)); + DONE; +}) + +(define_insn "*f2xm1xf2_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_F2XM1))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "f2xm1" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_insn "*fscalexf4_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 2 "register_operand" "0") + (match_operand:XF 3 "register_operand" "1")] + UNSPEC_FSCALE_FRACT)) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(match_dup 2) (match_dup 3)] + UNSPEC_FSCALE_EXP))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fscale" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "expNcorexf3" + [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" ""))) + (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) + (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) + (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) + (set (match_dup 8) (plus:XF (match_dup 6) (match_dup 7))) + (parallel [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_dup 8) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 9) + (unspec:XF [(match_dup 8) (match_dup 4)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 3; i < 10; i++) + operands[i] = gen_reg_rtx (XFmode); + + emit_move_insn (operands[7], CONST1_RTX (XFmode)); /* fld1 */ +}) + +(define_expand "expxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */ + + emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "exp2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_expxf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "exp10xf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */ + + emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "exp102" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_exp10xf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "exp2xf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + rtx op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op2 = gen_reg_rtx (XFmode); + emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */ + + emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2)); + DONE; +}) + +(define_expand "exp22" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_exp2xf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "expm1xf2" + [(set (match_dup 3) (mult:XF (match_operand:XF 1 "register_operand" "") + (match_dup 2))) + (set (match_dup 4) (unspec:XF [(match_dup 3)] UNSPEC_FRNDINT)) + (set (match_dup 5) (minus:XF (match_dup 3) (match_dup 4))) + (set (match_dup 9) (float_extend:XF (match_dup 13))) + (set (match_dup 6) (unspec:XF [(match_dup 5)] UNSPEC_F2XM1)) + (parallel [(set (match_dup 7) + (unspec:XF [(match_dup 6) (match_dup 4)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 8) + (unspec:XF [(match_dup 6) (match_dup 4)] + UNSPEC_FSCALE_EXP))]) + (parallel [(set (match_dup 10) + (unspec:XF [(match_dup 9) (match_dup 8)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 11) + (unspec:XF [(match_dup 9) (match_dup 8)] + UNSPEC_FSCALE_EXP))]) + (set (match_dup 12) (minus:XF (match_dup 10) + (float_extend:XF (match_dup 13)))) + (set (match_operand:XF 0 "register_operand" "") + (plus:XF (match_dup 12) (match_dup 7)))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + int i; + + if (optimize_insn_for_size_p ()) + FAIL; + + for (i = 2; i < 13; i++) + operands[i] = gen_reg_rtx (XFmode); + + operands[13] + = validize_mem (force_const_mem (SFmode, CONST1_RTX (SFmode))); /* fld1 */ + + emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */ +}) + +(define_expand "expm12" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_expm1xf2 (op0, op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "ldexpxf3" + [(set (match_dup 3) + (float:XF (match_operand:SI 2 "register_operand" ""))) + (parallel [(set (match_operand:XF 0 " register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_dup 3)] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 4) + (unspec:XF [(match_dup 1) (match_dup 3)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + operands[3] = gen_reg_rtx (XFmode); + operands[4] = gen_reg_rtx (XFmode); +}) + +(define_expand "ldexp3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:SI 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_ldexpxf3 (op0, op1, operands[2])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_expand "scalbxf3" + [(parallel [(set (match_operand:XF 0 " register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "") + (match_operand:XF 2 "register_operand" "")] + UNSPEC_FSCALE_FRACT)) + (set (match_dup 3) + (unspec:XF [(match_dup 1) (match_dup 2)] + UNSPEC_FSCALE_EXP))])] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + + operands[3] = gen_reg_rtx (XFmode); +}) + +(define_expand "scalb3" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "general_operand" "")) + (use (match_operand:MODEF 2 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0, op1, op2; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + op2 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_extendxf2 (op2, operands[2])); + emit_insn (gen_scalbxf3 (op0, op1, op2)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + + +(define_insn "sse4_1_round2" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x") + (match_operand:SI 2 "const_0_to_15_operand" "n")] + UNSPEC_ROUND))] + "TARGET_ROUND" + "%vrounds\t{%2, %1, %d0|%d0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "prefix_extra" "1") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "")]) + +(define_insn "rintxf2" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "frndint" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) + +(define_expand "rint2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math)" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math) + { + if (!TARGET_ROUND && optimize_insn_for_size_p ()) + FAIL; + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x04))); + else + ix86_expand_rint (operand0, operand1); + } + else + { + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_rintxf2 (op0, op1)); + + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + } + DONE; +}) + +(define_expand "round2" + [(match_operand:MODEF 0 "register_operand" "") + (match_operand:MODEF 1 "nonimmediate_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math && !flag_rounding_math" +{ + if (optimize_insn_for_size_p ()) + FAIL; + if (TARGET_64BIT || (mode != DFmode)) + ix86_expand_round (operand0, operand1); + else + ix86_expand_rounddf_32 (operand0, operand1); + DONE; +}) + +(define_insn_and_split "*fistdi2_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fistdi2 (operands[0], operands[1])); + else + { + operands[2] = assign_386_stack_local (DImode, SLOT_TEMP); + emit_insn (gen_fistdi2_with_temp (operands[0], operands[1], + operands[2])); + } + DONE; +} + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST)) + (clobber (match_scratch:XF 2 "=&1f"))] + "TARGET_USE_FANCY_MATH_387" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 3 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387" + "#" + [(set_attr "type" "fpspc") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) + (clobber (match_dup 3))]) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:DI 2 "memory_operand" "")) + (clobber (match_scratch 3 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST)) + (clobber (match_dup 3))])] + "") + +(define_insn_and_split "*fist2_1" + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387 + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + operands[2] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fist2_with_temp (operands[0], operands[1], + operands[2])); + DONE; +} + [(set_attr "type" "fpspc") + (set_attr "mode" "")]) + +(define_insn "fist2" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fpspc") + (set_attr "mode" "")]) + +(define_insn "fist2_with_temp" + [(set (match_operand:X87MODEI12 0 "register_operand" "=r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))] + "TARGET_USE_FANCY_MATH_387" + "#" + [(set_attr "type" "fpspc") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] + "reload_completed" + [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST)) + (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))] + "reload_completed" + [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))] + "") + +(define_expand "lrintxf2" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST))] + "TARGET_USE_FANCY_MATH_387" + "") + +(define_expand "lrint2" + [(set (match_operand:SSEMODEI24 0 "nonimmediate_operand" "") + (unspec:SSEMODEI24 [(match_operand:MODEF 1 "register_operand" "")] + UNSPEC_FIX_NOTRUNC))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && ((mode != DImode) || TARGET_64BIT)" + "") + +(define_expand "lround2" + [(match_operand:SSEMODEI24 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && ((mode != DImode) || TARGET_64BIT) + && !flag_trapping_math && !flag_rounding_math" +{ + if (optimize_insn_for_size_p ()) + FAIL; + ix86_expand_lround (operand0, operand1); + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_floor" + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FRNDINT_FLOOR)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_FLOOR] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR); + + emit_insn (gen_frndintxf2_floor_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "floor") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_floor_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "floor") + (set_attr "mode" "XF")]) + +(define_expand "floorxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + emit_insn (gen_frndintxf2_floor (operands[0], operands[1])); + DONE; +}) + +(define_expand "floor2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math)" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || optimize_insn_for_speed_p ())) + { + if (!TARGET_ROUND && optimize_insn_for_size_p ()) + FAIL; + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x01))); + else if (TARGET_64BIT || (mode != DFmode)) + ix86_expand_floorceil (operand0, operand1, true); + else + ix86_expand_floorceildf_32 (operand0, operand1, true); + } + else + { + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_floor (op0, op1)); + + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + } + DONE; +}) + +(define_insn_and_split "*fist2_floor_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_FLOOR] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fist2_floor (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fist2_floor_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "")]) + +(define_insn "fistdi2_floor" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_floor_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])] + "") + +(define_insn "fist2_floor" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "")]) + +(define_insn "fist2_floor_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "floor") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_FLOOR)) + (use (match_dup 2)) + (use (match_dup 3))])] + "") + +(define_expand "lfloorxf2" + [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_FLOOR)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "") + +(define_expand "lfloordi2" + [(match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && TARGET_64BIT + && !flag_trapping_math" +{ + if (optimize_insn_for_size_p ()) + FAIL; + ix86_expand_lfloorceil (operand0, operand1, true); + DONE; +}) + +(define_expand "lfloorsi2" + [(match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math" +{ + if (optimize_insn_for_size_p () && TARGET_64BIT) + FAIL; + ix86_expand_lfloorceil (operand0, operand1, true); + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_ceil" + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FRNDINT_CEIL)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_CEIL] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL); + + emit_insn (gen_frndintxf2_ceil_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_ceil_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "XF")]) + +(define_expand "ceilxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + emit_insn (gen_frndintxf2_ceil (operands[0], operands[1])); + DONE; +}) + +(define_expand "ceil2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math)" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || optimize_insn_for_speed_p ())) + { + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x02))); + else if (optimize_insn_for_size_p ()) + FAIL; + else if (TARGET_64BIT || (mode != DFmode)) + ix86_expand_floorceil (operand0, operand1, false); + else + ix86_expand_floorceildf_32 (operand0, operand1, false); + } + else + { + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_ceil (op0, op1)); + + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + } + DONE; +}) + +(define_insn_and_split "*fist2_ceil_1" + [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_CEIL] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL); + if (memory_operand (operands[0], VOIDmode)) + emit_insn (gen_fist2_ceil (operands[0], operands[1], + operands[2], operands[3])); + else + { + operands[4] = assign_386_stack_local (mode, SLOT_TEMP); + emit_insn (gen_fist2_ceil_with_temp (operands[0], operands[1], + operands[2], operands[3], + operands[4])); + } + DONE; +} + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "")]) + +(define_insn "fistdi2_ceil" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m")) + (clobber (match_scratch:XF 4 "=&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "DI")]) + +(define_insn "fistdi2_ceil_with_temp" + [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r") + (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:DI 4 "memory_operand" "=X,m")) + (clobber (match_scratch:XF 5 "=&1f,&1f"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "DI")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:DI 0 "memory_operand" "") + (unspec:DI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:DI 4 "memory_operand" "")) + (clobber (match_scratch 5 ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3)) + (clobber (match_dup 5))])] + "") + +(define_insn "fist2_ceil" + [(set (match_operand:X87MODEI12 0 "memory_operand" "=m") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "* return output_fix_trunc (insn, operands, 0);" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "")]) + +(define_insn "fist2_ceil_with_temp" + [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "m,m")) + (use (match_operand:HI 3 "memory_operand" "m,m")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "#" + [(set_attr "type" "fistp") + (set_attr "i387_cw" "ceil") + (set_attr "mode" "")]) + +(define_split + [(set (match_operand:X87MODEI12 0 "register_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3))]) + (set (match_dup 0) (match_dup 4))] + "") + +(define_split + [(set (match_operand:X87MODEI12 0 "memory_operand" "") + (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (use (match_operand:HI 2 "memory_operand" "")) + (use (match_operand:HI 3 "memory_operand" "")) + (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))] + "reload_completed" + [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] + UNSPEC_FIST_CEIL)) + (use (match_dup 2)) + (use (match_dup 3))])] + "") + +(define_expand "lceilxf2" + [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "") + (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FIST_CEIL)) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_USE_FANCY_MATH_387 + && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" + "") + +(define_expand "lceildi2" + [(match_operand:DI 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && TARGET_64BIT + && !flag_trapping_math" +{ + ix86_expand_lfloorceil (operand0, operand1, false); + DONE; +}) + +(define_expand "lceilsi2" + [(match_operand:SI 0 "nonimmediate_operand" "") + (match_operand:MODEF 1 "register_operand" "")] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math" +{ + ix86_expand_lfloorceil (operand0, operand1, false); + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_trunc" + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FRNDINT_TRUNC)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_TRUNC] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC); + + emit_insn (gen_frndintxf2_trunc_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_trunc_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_TRUNC)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "trunc") + (set_attr "mode" "XF")]) + +(define_expand "btruncxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + if (optimize_insn_for_size_p ()) + FAIL; + emit_insn (gen_frndintxf2_trunc (operands[0], operands[1])); + DONE; +}) + +(define_expand "btrunc2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "(TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math)" +{ + if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && !flag_trapping_math + && (TARGET_ROUND || optimize_insn_for_speed_p ())) + { + if (TARGET_ROUND) + emit_insn (gen_sse4_1_round2 + (operands[0], operands[1], GEN_INT (0x03))); + else if (optimize_insn_for_size_p ()) + FAIL; + else if (TARGET_64BIT || (mode != DFmode)) + ix86_expand_trunc (operand0, operand1); + else + ix86_expand_truncdf_32 (operand0, operand1); + } + else + { + rtx op0, op1; + + if (optimize_insn_for_size_p ()) + FAIL; + + op0 = gen_reg_rtx (XFmode); + op1 = gen_reg_rtx (XFmode); + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_trunc (op0, op1)); + + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + } + DONE; +}) + +;; Rounding mode control word calculation could clobber FLAGS_REG. +(define_insn_and_split "frndintxf2_mask_pm" + [(set (match_operand:XF 0 "register_operand" "") + (unspec:XF [(match_operand:XF 1 "register_operand" "")] + UNSPEC_FRNDINT_MASK_PM)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(const_int 0)] +{ + ix86_optimize_mode_switching[I387_MASK_PM] = 1; + + operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED); + operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM); + + emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1], + operands[2], operands[3])); + DONE; +} + [(set_attr "type" "frndint") + (set_attr "i387_cw" "mask_pm") + (set_attr "mode" "XF")]) + +(define_insn "frndintxf2_mask_pm_i387" + [(set (match_operand:XF 0 "register_operand" "=f") + (unspec:XF [(match_operand:XF 1 "register_operand" "0")] + UNSPEC_FRNDINT_MASK_PM)) + (use (match_operand:HI 2 "memory_operand" "m")) + (use (match_operand:HI 3 "memory_operand" "m"))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" + "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2" + [(set_attr "type" "frndint") + (set_attr "i387_cw" "mask_pm") + (set_attr "mode" "XF")]) + +(define_expand "nearbyintxf2" + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && flag_unsafe_math_optimizations" +{ + emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1])); + + DONE; +}) + +(define_expand "nearbyint2" + [(use (match_operand:MODEF 0 "register_operand" "")) + (use (match_operand:MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + rtx op1 = gen_reg_rtx (XFmode); + + emit_insn (gen_extendxf2 (op1, operands[1])); + emit_insn (gen_frndintxf2_mask_pm (op0, op1)); + + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; +}) + +(define_insn "fxam2_i387" + [(set (match_operand:HI 0 "register_operand" "=a") + (unspec:HI + [(match_operand:X87MODEF 1 "register_operand" "f")] + UNSPEC_FXAM))] + "TARGET_USE_FANCY_MATH_387" + "fxam\n\tfnstsw\t%0" + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "")]) + +(define_insn_and_split "fxam2_i387_with_temp" + [(set (match_operand:HI 0 "register_operand" "") + (unspec:HI + [(match_operand:MODEF 1 "memory_operand" "")] + UNSPEC_FXAM_MEM))] + "TARGET_USE_FANCY_MATH_387 + && !(reload_completed || reload_in_progress)" + "#" + "&& 1" + [(set (match_dup 2)(match_dup 1)) + (set (match_dup 0) + (unspec:HI [(match_dup 2)] UNSPEC_FXAM))] +{ + operands[2] = gen_reg_rtx (mode); + + MEM_VOLATILE_P (operands[1]) = 1; +} + [(set_attr "type" "multi") + (set_attr "unit" "i387") + (set_attr "mode" "")]) + +(define_expand "isinfxf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && TARGET_C99_FUNCTIONS" +{ + rtx mask = GEN_INT (0x45); + rtx val = GEN_INT (0x05); + + rtx cond; + + rtx scratch = gen_reg_rtx (HImode); + rtx res = gen_reg_rtx (QImode); + + emit_insn (gen_fxamxf2_i387 (scratch, operands[1])); + + emit_insn (gen_andqi_ext_0 (scratch, scratch, mask)); + emit_insn (gen_cmpqi_ext_3 (scratch, val)); + cond = gen_rtx_fmt_ee (EQ, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, res, cond)); + emit_insn (gen_zero_extendqisi2 (operands[0], res)); + DONE; +}) + +(define_expand "isinf2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:MODEF 1 "nonimmediate_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && TARGET_C99_FUNCTIONS + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + rtx mask = GEN_INT (0x45); + rtx val = GEN_INT (0x05); + + rtx cond; + + rtx scratch = gen_reg_rtx (HImode); + rtx res = gen_reg_rtx (QImode); + + /* Remove excess precision by forcing value through memory. */ + if (memory_operand (operands[1], VOIDmode)) + emit_insn (gen_fxam2_i387_with_temp (scratch, operands[1])); + else + { + int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL; + rtx temp = assign_386_stack_local (mode, slot); + + emit_move_insn (temp, operands[1]); + emit_insn (gen_fxam2_i387_with_temp (scratch, temp)); + } + + emit_insn (gen_andqi_ext_0 (scratch, scratch, mask)); + emit_insn (gen_cmpqi_ext_3 (scratch, val)); + cond = gen_rtx_fmt_ee (EQ, QImode, + gen_rtx_REG (CCmode, FLAGS_REG), + const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, res, cond)); + emit_insn (gen_zero_extendqisi2 (operands[0], res)); + DONE; +}) + +(define_expand "signbit2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:X87MODEF 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && !(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" +{ + rtx mask = GEN_INT (0x0200); + + rtx scratch = gen_reg_rtx (HImode); + + emit_insn (gen_fxam2_i387 (scratch, operands[1])); + emit_insn (gen_andsi3 (operands[0], gen_lowpart (SImode, scratch), mask)); + DONE; +}) + +;; Block operation instructions + +(define_insn "cld" + [(unspec_volatile [(const_int 0)] UNSPECV_CLD)] + "" + "cld" + [(set_attr "length" "1") + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +(define_expand "movmemsi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:SI 2 "nonmemory_operand" "")) + (use (match_operand:SI 3 "const_int_operand" "")) + (use (match_operand:SI 4 "const_int_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))] + "" +{ + if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3], + operands[4], operands[5])) + DONE; + else + FAIL; +}) + +(define_expand "movmemdi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:DI 2 "nonmemory_operand" "")) + (use (match_operand:DI 3 "const_int_operand" "")) + (use (match_operand:SI 4 "const_int_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))] + "TARGET_64BIT" +{ + if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3], + operands[4], operands[5])) + DONE; + else + FAIL; +}) + +;; Most CPUs don't like single string operations +;; Handle this case here to simplify previous expander. + +(define_expand "strmov" + [(set (match_dup 4) (match_operand 3 "memory_operand" "")) + (set (match_operand 1 "memory_operand" "") (match_dup 4)) + (parallel [(set (match_operand 0 "register_operand" "") (match_dup 5)) + (clobber (reg:CC FLAGS_REG))]) + (parallel [(set (match_operand 2 "register_operand" "") (match_dup 6)) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + rtx adjust = GEN_INT (GET_MODE_SIZE (GET_MODE (operands[1]))); + + /* If .md ever supports :P for Pmode, these can be directly + in the pattern above. */ + operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust); + operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust); + + /* Can't use this if the user has appropriated esi or edi. */ + if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) + && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])) + { + emit_insn (gen_strmov_singleop (operands[0], operands[1], + operands[2], operands[3], + operands[5], operands[6])); + DONE; + } + + operands[4] = gen_reg_rtx (GET_MODE (operands[1])); +}) + +(define_expand "strmov_singleop" + [(parallel [(set (match_operand 1 "memory_operand" "") + (match_operand 3 "memory_operand" "")) + (set (match_operand 0 "register_operand" "") + (match_operand 4 "" "")) + (set (match_operand 2 "register_operand" "") + (match_operand 5 "" ""))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*strmovdi_rex_1" + [(set (mem:DI (match_operand:DI 2 "register_operand" "0")) + (mem:DI (match_operand:DI 3 "register_operand" "1"))) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 2) + (const_int 8))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_dup 3) + (const_int 8)))] + "TARGET_64BIT" + "movsq" + [(set_attr "type" "str") + (set_attr "mode" "DI") + (set_attr "memory" "both")]) + +(define_insn "*strmovsi_1" + [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) + (mem:SI (match_operand:SI 3 "register_operand" "1"))) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 2) + (const_int 4))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (match_dup 3) + (const_int 4)))] + "!TARGET_64BIT" + "movs{l|d}" + [(set_attr "type" "str") + (set_attr "mode" "SI") + (set_attr "memory" "both")]) + +(define_insn "*strmovsi_rex_1" + [(set (mem:SI (match_operand:DI 2 "register_operand" "0")) + (mem:SI (match_operand:DI 3 "register_operand" "1"))) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 2) + (const_int 4))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_dup 3) + (const_int 4)))] + "TARGET_64BIT" + "movs{l|d}" + [(set_attr "type" "str") + (set_attr "mode" "SI") + (set_attr "memory" "both")]) + +(define_insn "*strmovhi_1" + [(set (mem:HI (match_operand:SI 2 "register_operand" "0")) + (mem:HI (match_operand:SI 3 "register_operand" "1"))) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 2) + (const_int 2))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (match_dup 3) + (const_int 2)))] + "!TARGET_64BIT" + "movsw" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "HI")]) + +(define_insn "*strmovhi_rex_1" + [(set (mem:HI (match_operand:DI 2 "register_operand" "0")) + (mem:HI (match_operand:DI 3 "register_operand" "1"))) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 2) + (const_int 2))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_dup 3) + (const_int 2)))] + "TARGET_64BIT" + "movsw" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "HI")]) + +(define_insn "*strmovqi_1" + [(set (mem:QI (match_operand:SI 2 "register_operand" "0")) + (mem:QI (match_operand:SI 3 "register_operand" "1"))) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 2) + (const_int 1))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (match_dup 3) + (const_int 1)))] + "!TARGET_64BIT" + "movsb" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "QI")]) + +(define_insn "*strmovqi_rex_1" + [(set (mem:QI (match_operand:DI 2 "register_operand" "0")) + (mem:QI (match_operand:DI 3 "register_operand" "1"))) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 2) + (const_int 1))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_dup 3) + (const_int 1)))] + "TARGET_64BIT" + "movsb" + [(set_attr "type" "str") + (set_attr "memory" "both") + (set_attr "mode" "QI")]) + +(define_expand "rep_mov" + [(parallel [(set (match_operand 4 "register_operand" "") (const_int 0)) + (set (match_operand 0 "register_operand" "") + (match_operand 5 "" "")) + (set (match_operand 2 "register_operand" "") + (match_operand 6 "" "")) + (set (match_operand 1 "memory_operand" "") + (match_operand 3 "memory_operand" "")) + (use (match_dup 4))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*rep_movdi_rex64" + [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2") + (const_int 3)) + (match_operand:DI 3 "register_operand" "0"))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (ashift:DI (match_dup 5) (const_int 3)) + (match_operand:DI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5))] + "TARGET_64BIT" + "rep movsq" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "DI")]) + +(define_insn "*rep_movsi" + [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (ashift:SI (match_operand:SI 5 "register_operand" "2") + (const_int 2)) + (match_operand:SI 3 "register_operand" "0"))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (ashift:SI (match_dup 5) (const_int 2)) + (match_operand:SI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5))] + "!TARGET_64BIT" + "rep movs{l|d}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_insn "*rep_movsi_rex64" + [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 5 "register_operand" "2") + (const_int 2)) + (match_operand:DI 3 "register_operand" "0"))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (ashift:DI (match_dup 5) (const_int 2)) + (match_operand:DI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5))] + "TARGET_64BIT" + "rep movs{l|d}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_insn "*rep_movqi" + [(set (match_operand:SI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_operand:SI 3 "register_operand" "0") + (match_operand:SI 5 "register_operand" "2"))) + (set (match_operand:SI 1 "register_operand" "=S") + (plus:SI (match_operand:SI 4 "register_operand" "1") (match_dup 5))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5))] + "!TARGET_64BIT" + "rep movsb" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_insn "*rep_movqi_rex64" + [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_operand:DI 3 "register_operand" "0") + (match_operand:DI 5 "register_operand" "2"))) + (set (match_operand:DI 1 "register_operand" "=S") + (plus:DI (match_operand:DI 4 "register_operand" "1") (match_dup 5))) + (set (mem:BLK (match_dup 3)) + (mem:BLK (match_dup 4))) + (use (match_dup 5))] + "TARGET_64BIT" + "rep movsb" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "both") + (set_attr "mode" "SI")]) + +(define_expand "setmemsi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:SI 1 "nonmemory_operand" "")) + (use (match_operand 2 "const_int_operand" "")) + (use (match_operand 3 "const_int_operand" "")) + (use (match_operand:SI 4 "const_int_operand" "")) + (use (match_operand:SI 5 "const_int_operand" ""))] + "" +{ + if (ix86_expand_setmem (operands[0], operands[1], + operands[2], operands[3], + operands[4], operands[5])) + DONE; + else + FAIL; +}) + +(define_expand "setmemdi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:DI 1 "nonmemory_operand" "")) + (use (match_operand 2 "const_int_operand" "")) + (use (match_operand 3 "const_int_operand" "")) + (use (match_operand 4 "const_int_operand" "")) + (use (match_operand 5 "const_int_operand" ""))] + "TARGET_64BIT" +{ + if (ix86_expand_setmem (operands[0], operands[1], + operands[2], operands[3], + operands[4], operands[5])) + DONE; + else + FAIL; +}) + +;; Most CPUs don't like single string operations +;; Handle this case here to simplify previous expander. + +(define_expand "strset" + [(set (match_operand 1 "memory_operand" "") + (match_operand 2 "register_operand" "")) + (parallel [(set (match_operand 0 "register_operand" "") + (match_dup 3)) + (clobber (reg:CC FLAGS_REG))])] + "" +{ + if (GET_MODE (operands[1]) != GET_MODE (operands[2])) + operands[1] = adjust_address_nv (operands[1], GET_MODE (operands[2]), 0); + + /* If .md ever supports :P for Pmode, this can be directly + in the pattern above. */ + operands[3] = gen_rtx_PLUS (Pmode, operands[0], + GEN_INT (GET_MODE_SIZE (GET_MODE + (operands[2])))); + if (TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) + { + emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2], + operands[3])); + DONE; + } +}) + +(define_expand "strset_singleop" + [(parallel [(set (match_operand 1 "memory_operand" "") + (match_operand 2 "register_operand" "")) + (set (match_operand 0 "register_operand" "") + (match_operand 3 "" ""))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*strsetdi_rex_1" + [(set (mem:DI (match_operand:DI 1 "register_operand" "0")) + (match_operand:DI 2 "register_operand" "a")) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 1) + (const_int 8)))] + "TARGET_64BIT" + "stosq" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +(define_insn "*strsetsi_1" + [(set (mem:SI (match_operand:SI 1 "register_operand" "0")) + (match_operand:SI 2 "register_operand" "a")) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 1) + (const_int 4)))] + "!TARGET_64BIT" + "stos{l|d}" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*strsetsi_rex_1" + [(set (mem:SI (match_operand:DI 1 "register_operand" "0")) + (match_operand:SI 2 "register_operand" "a")) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 1) + (const_int 4)))] + "TARGET_64BIT" + "stos{l|d}" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*strsethi_1" + [(set (mem:HI (match_operand:SI 1 "register_operand" "0")) + (match_operand:HI 2 "register_operand" "a")) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 1) + (const_int 2)))] + "!TARGET_64BIT" + "stosw" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "HI")]) + +(define_insn "*strsethi_rex_1" + [(set (mem:HI (match_operand:DI 1 "register_operand" "0")) + (match_operand:HI 2 "register_operand" "a")) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 1) + (const_int 2)))] + "TARGET_64BIT" + "stosw" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "HI")]) + +(define_insn "*strsetqi_1" + [(set (mem:QI (match_operand:SI 1 "register_operand" "0")) + (match_operand:QI 2 "register_operand" "a")) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_dup 1) + (const_int 1)))] + "!TARGET_64BIT" + "stosb" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_insn "*strsetqi_rex_1" + [(set (mem:QI (match_operand:DI 1 "register_operand" "0")) + (match_operand:QI 2 "register_operand" "a")) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_dup 1) + (const_int 1)))] + "TARGET_64BIT" + "stosb" + [(set_attr "type" "str") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_expand "rep_stos" + [(parallel [(set (match_operand 1 "register_operand" "") (const_int 0)) + (set (match_operand 0 "register_operand" "") + (match_operand 4 "" "")) + (set (match_operand 2 "memory_operand" "") (const_int 0)) + (use (match_operand 3 "register_operand" "")) + (use (match_dup 1))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*rep_stosdi_rex64" + [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1") + (const_int 3)) + (match_operand:DI 3 "register_operand" "0"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:DI 2 "register_operand" "a")) + (use (match_dup 4))] + "TARGET_64BIT" + "rep stosq" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "DI")]) + +(define_insn "*rep_stossi" + [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (ashift:SI (match_operand:SI 4 "register_operand" "1") + (const_int 2)) + (match_operand:SI 3 "register_operand" "0"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:SI 2 "register_operand" "a")) + (use (match_dup 4))] + "!TARGET_64BIT" + "rep stos{l|d}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*rep_stossi_rex64" + [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (ashift:DI (match_operand:DI 4 "register_operand" "1") + (const_int 2)) + (match_operand:DI 3 "register_operand" "0"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:SI 2 "register_operand" "a")) + (use (match_dup 4))] + "TARGET_64BIT" + "rep stos{l|d}" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "SI")]) + +(define_insn "*rep_stosqi" + [(set (match_operand:SI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:SI 0 "register_operand" "=D") + (plus:SI (match_operand:SI 3 "register_operand" "0") + (match_operand:SI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:QI 2 "register_operand" "a")) + (use (match_dup 4))] + "!TARGET_64BIT" + "rep stosb" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_insn "*rep_stosqi_rex64" + [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0)) + (set (match_operand:DI 0 "register_operand" "=D") + (plus:DI (match_operand:DI 3 "register_operand" "0") + (match_operand:DI 4 "register_operand" "1"))) + (set (mem:BLK (match_dup 3)) + (const_int 0)) + (use (match_operand:QI 2 "register_operand" "a")) + (use (match_dup 4))] + "TARGET_64BIT" + "rep stosb" + [(set_attr "type" "str") + (set_attr "prefix_rep" "1") + (set_attr "memory" "store") + (set_attr "mode" "QI")]) + +(define_expand "cmpstrnsi" + [(set (match_operand:SI 0 "register_operand" "") + (compare:SI (match_operand:BLK 1 "general_operand" "") + (match_operand:BLK 2 "general_operand" ""))) + (use (match_operand 3 "general_operand" "")) + (use (match_operand 4 "immediate_operand" ""))] + "" +{ + rtx addr1, addr2, out, outlow, count, countreg, align; + + if (!TARGET_INLINE_COMPARES) + FAIL; + + if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS) + FAIL; + + /* Can't use this if the user has appropriated esi or edi. */ + if (fixed_regs[SI_REG] || fixed_regs[DI_REG]) + FAIL; + + out = operands[0]; + if (!REG_P (out)) + out = gen_reg_rtx (SImode); + + addr1 = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); + addr2 = copy_to_mode_reg (Pmode, XEXP (operands[2], 0)); + if (addr1 != XEXP (operands[1], 0)) + operands[1] = replace_equiv_address_nv (operands[1], addr1); + if (addr2 != XEXP (operands[2], 0)) + operands[2] = replace_equiv_address_nv (operands[2], addr2); + + count = operands[3]; + countreg = ix86_zero_extend_to_Pmode (count); + + /* %%% Iff we are testing strict equality, we can use known alignment + to good advantage. This may be possible with combine, particularly + once cc0 is dead. */ + align = operands[4]; + + if (CONST_INT_P (count)) + { + if (INTVAL (count) == 0) + { + emit_move_insn (operands[0], const0_rtx); + DONE; + } + emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); + } + else + { + if (TARGET_64BIT) + emit_insn (gen_cmpdi_1_rex64 (countreg, countreg)); + else + emit_insn (gen_cmpsi_1 (countreg, countreg)); + emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align, + operands[1], operands[2])); + } + + outlow = gen_lowpart (QImode, out); + emit_insn (gen_cmpintqi (outlow)); + emit_move_insn (out, gen_rtx_SIGN_EXTEND (SImode, outlow)); + + if (operands[0] != out) + emit_move_insn (operands[0], out); + + DONE; +}) + +;; Produce a tri-state integer (-1, 0, 1) from condition codes. + +(define_expand "cmpintqi" + [(set (match_dup 1) + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (match_dup 2) + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (parallel [(set (match_operand:QI 0 "register_operand" "") + (minus:QI (match_dup 1) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "" + "operands[1] = gen_reg_rtx (QImode); + operands[2] = gen_reg_rtx (QImode);") + +;; memcmp recognizers. The `cmpsb' opcode does nothing if the count is +;; zero. Emit extra code to make sure that a zero-length compare is EQ. + +(define_expand "cmpstrnqi_nz_1" + [(parallel [(set (reg:CC FLAGS_REG) + (compare:CC (match_operand 4 "memory_operand" "") + (match_operand 5 "memory_operand" ""))) + (use (match_operand 2 "register_operand" "")) + (use (match_operand:SI 3 "immediate_operand" "")) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_dup 2))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*cmpstrnqi_nz_1" + [(set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0")) + (mem:BLK (match_operand:SI 5 "register_operand" "1")))) + (use (match_operand:SI 6 "register_operand" "2")) + (use (match_operand:SI 3 "immediate_operand" "i")) + (clobber (match_operand:SI 0 "register_operand" "=S")) + (clobber (match_operand:SI 1 "register_operand" "=D")) + (clobber (match_operand:SI 2 "register_operand" "=c"))] + "!TARGET_64BIT" + "repz cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +(define_insn "*cmpstrnqi_nz_rex_1" + [(set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0")) + (mem:BLK (match_operand:DI 5 "register_operand" "1")))) + (use (match_operand:DI 6 "register_operand" "2")) + (use (match_operand:SI 3 "immediate_operand" "i")) + (clobber (match_operand:DI 0 "register_operand" "=S")) + (clobber (match_operand:DI 1 "register_operand" "=D")) + (clobber (match_operand:DI 2 "register_operand" "=c"))] + "TARGET_64BIT" + "repz cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +;; The same, but the count is not known to not be zero. + +(define_expand "cmpstrnqi_1" + [(parallel [(set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand 2 "register_operand" "") + (const_int 0)) + (compare:CC (match_operand 4 "memory_operand" "") + (match_operand 5 "memory_operand" "")) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "")) + (use (reg:CC FLAGS_REG)) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_dup 2))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*cmpstrnqi_1" + [(set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2") + (const_int 0)) + (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0")) + (mem:BLK (match_operand:SI 5 "register_operand" "1"))) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:CC FLAGS_REG)) + (clobber (match_operand:SI 0 "register_operand" "=S")) + (clobber (match_operand:SI 1 "register_operand" "=D")) + (clobber (match_operand:SI 2 "register_operand" "=c"))] + "!TARGET_64BIT" + "repz cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +(define_insn "*cmpstrnqi_rex_1" + [(set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2") + (const_int 0)) + (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0")) + (mem:BLK (match_operand:DI 5 "register_operand" "1"))) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "i")) + (use (reg:CC FLAGS_REG)) + (clobber (match_operand:DI 0 "register_operand" "=S")) + (clobber (match_operand:DI 1 "register_operand" "=D")) + (clobber (match_operand:DI 2 "register_operand" "=c"))] + "TARGET_64BIT" + "repz cmpsb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +(define_expand "strlensi" + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_operand:BLK 1 "general_operand" "") + (match_operand:QI 2 "immediate_operand" "") + (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))] + "" +{ + if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_expand "strlendi" + [(set (match_operand:DI 0 "register_operand" "") + (unspec:DI [(match_operand:BLK 1 "general_operand" "") + (match_operand:QI 2 "immediate_operand" "") + (match_operand 3 "immediate_operand" "")] UNSPEC_SCAS))] + "" +{ + if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) + DONE; + else + FAIL; +}) + +(define_expand "strlenqi_1" + [(parallel [(set (match_operand 0 "register_operand" "") (match_operand 2 "" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (reg:CC FLAGS_REG))])] + "" + "ix86_current_function_needs_cld = 1;") + +(define_insn "*strlenqi_1" + [(set (match_operand:SI 0 "register_operand" "=&c") + (unspec:SI [(mem:BLK (match_operand:SI 5 "register_operand" "1")) + (match_operand:QI 2 "register_operand" "a") + (match_operand:SI 3 "immediate_operand" "i") + (match_operand:SI 4 "register_operand" "0")] UNSPEC_SCAS)) + (clobber (match_operand:SI 1 "register_operand" "=D")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "repnz scasb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +(define_insn "*strlenqi_rex_1" + [(set (match_operand:DI 0 "register_operand" "=&c") + (unspec:DI [(mem:BLK (match_operand:DI 5 "register_operand" "1")) + (match_operand:QI 2 "register_operand" "a") + (match_operand:DI 3 "immediate_operand" "i") + (match_operand:DI 4 "register_operand" "0")] UNSPEC_SCAS)) + (clobber (match_operand:DI 1 "register_operand" "=D")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "repnz scasb" + [(set_attr "type" "str") + (set_attr "mode" "QI") + (set_attr "prefix_rep" "1")]) + +;; Peephole optimizations to clean up after cmpstrn*. This should be +;; handled in combine, but it is not currently up to the task. +;; When used for their truth value, the cmpstrn* expanders generate +;; code like this: +;; +;; repz cmpsb +;; seta %al +;; setb %dl +;; cmpb %al, %dl +;; jcc label +;; +;; The intermediate three instructions are unnecessary. + +;; This one handles cmpstrn*_nz_1... +(define_peephole2 + [(parallel[ + (set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_operand 4 "register_operand" "")) + (mem:BLK (match_operand 5 "register_operand" "")))) + (use (match_operand 6 "register_operand" "")) + (use (match_operand:SI 3 "immediate_operand" "")) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_operand 2 "register_operand" ""))]) + (set (match_operand:QI 7 "register_operand" "") + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (match_operand:QI 8 "register_operand" "") + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (reg FLAGS_REG) + (compare (match_dup 7) (match_dup 8))) + ] + "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" + [(parallel[ + (set (reg:CC FLAGS_REG) + (compare:CC (mem:BLK (match_dup 4)) + (mem:BLK (match_dup 5)))) + (use (match_dup 6)) + (use (match_dup 3)) + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (clobber (match_dup 2))])] + "") + +;; ...and this one handles cmpstrn*_1. +(define_peephole2 + [(parallel[ + (set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_operand 6 "register_operand" "") + (const_int 0)) + (compare:CC (mem:BLK (match_operand 4 "register_operand" "")) + (mem:BLK (match_operand 5 "register_operand" ""))) + (const_int 0))) + (use (match_operand:SI 3 "immediate_operand" "")) + (use (reg:CC FLAGS_REG)) + (clobber (match_operand 0 "register_operand" "")) + (clobber (match_operand 1 "register_operand" "")) + (clobber (match_operand 2 "register_operand" ""))]) + (set (match_operand:QI 7 "register_operand" "") + (gtu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (match_operand:QI 8 "register_operand" "") + (ltu:QI (reg:CC FLAGS_REG) (const_int 0))) + (set (reg FLAGS_REG) + (compare (match_dup 7) (match_dup 8))) + ] + "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])" + [(parallel[ + (set (reg:CC FLAGS_REG) + (if_then_else:CC (ne (match_dup 6) + (const_int 0)) + (compare:CC (mem:BLK (match_dup 4)) + (mem:BLK (match_dup 5))) + (const_int 0))) + (use (match_dup 3)) + (use (reg:CC FLAGS_REG)) + (clobber (match_dup 0)) + (clobber (match_dup 1)) + (clobber (match_dup 2))])] + "") + + + +;; Conditional move instructions. + +(define_expand "movdicc" + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (match_operand 1 "comparison_operator" "") + (match_operand:DI 2 "general_operand" "") + (match_operand:DI 3 "general_operand" "")))] + "TARGET_64BIT" + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") + +(define_insn "x86_movdicc_0_m1_rex64" + [(set (match_operand:DI 0 "register_operand" "=r") + (if_then_else:DI (match_operand 1 "ix86_carry_flag_operator" "") + (const_int -1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "sbb{q}\t%0, %0" + ; Since we don't have the proper number of operands for an alu insn, + ; fill in all the blanks. + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "DI") + (set_attr "length_immediate" "0")]) + +(define_insn "*x86_movdicc_0_m1_se" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extract:DI (match_operand 1 "ix86_carry_flag_operator" "") + (const_int 1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{q}\t%0, %0" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "DI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movdicc_c_rex64" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (if_then_else:DI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:DI 2 "nonimmediate_operand" "rm,0") + (match_operand:DI 3 "nonimmediate_operand" "0,rm")))] + "TARGET_64BIT && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "icmov") + (set_attr "mode" "DI")]) + +(define_expand "movsicc" + [(set (match_operand:SI 0 "register_operand" "") + (if_then_else:SI (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "general_operand" "") + (match_operand:SI 3 "general_operand" "")))] + "" + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") + +;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing +;; the register first winds up with `sbbl $0,reg', which is also weird. +;; So just document what we're doing explicitly. + +(define_insn "x86_movsicc_0_m1" + [(set (match_operand:SI 0 "register_operand" "=r") + (if_then_else:SI (match_operand 1 "ix86_carry_flag_operator" "") + (const_int -1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{l}\t%0, %0" + ; Since we don't have the proper number of operands for an alu insn, + ; fill in all the blanks. + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*x86_movsicc_0_m1_se" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (match_operand 1 "ix86_carry_flag_operator" "") + (const_int 1) + (const_int 0))) + (clobber (reg:CC FLAGS_REG))] + "" + "sbb{l}\t%0, %0" + [(set_attr "type" "alu") + (set_attr "pent_pair" "pu") + (set_attr "memory" "none") + (set_attr "imm_disp" "false") + (set_attr "mode" "SI") + (set_attr "length_immediate" "0")]) + +(define_insn "*movsicc_noc" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (if_then_else:SI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SI 2 "nonimmediate_operand" "rm,0") + (match_operand:SI 3 "nonimmediate_operand" "0,rm")))] + "TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "icmov") + (set_attr "mode" "SI")]) + +(define_expand "movhicc" + [(set (match_operand:HI 0 "register_operand" "") + (if_then_else:HI (match_operand 1 "comparison_operator" "") + (match_operand:HI 2 "general_operand" "") + (match_operand:HI 3 "general_operand" "")))] + "TARGET_HIMODE_MATH" + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") + +(define_insn "*movhicc_noc" + [(set (match_operand:HI 0 "register_operand" "=r,r") + (if_then_else:HI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:HI 2 "nonimmediate_operand" "rm,0") + (match_operand:HI 3 "nonimmediate_operand" "0,rm")))] + "TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "icmov") + (set_attr "mode" "HI")]) + +(define_expand "movqicc" + [(set (match_operand:QI 0 "register_operand" "") + (if_then_else:QI (match_operand 1 "comparison_operator" "") + (match_operand:QI 2 "general_operand" "") + (match_operand:QI 3 "general_operand" "")))] + "TARGET_QIMODE_MATH" + "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;") + +(define_insn_and_split "*movqicc_noc" + [(set (match_operand:QI 0 "register_operand" "=r,r") + (if_then_else:QI (match_operator 1 "ix86_comparison_operator" + [(match_operand 4 "flags_reg_operand" "") + (const_int 0)]) + (match_operand:QI 2 "register_operand" "r,0") + (match_operand:QI 3 "register_operand" "0,r")))] + "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL" + "#" + "&& reload_completed" + [(set (match_dup 0) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 2) + (match_dup 3)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_lowpart (SImode, operands[3]);" + [(set_attr "type" "icmov") + (set_attr "mode" "SI")]) + +(define_expand "movcc" + [(set (match_operand:X87MODEF 0 "register_operand" "") + (if_then_else:X87MODEF + (match_operand 1 "comparison_operator" "") + (match_operand:X87MODEF 2 "register_operand" "") + (match_operand:X87MODEF 3 "register_operand" "")))] + "(TARGET_80387 && TARGET_CMOVE) + || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)" + "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;") + +(define_insn "*movsfcc_1_387" + [(set (match_operand:SF 0 "register_operand" "=f,f,r,r") + (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))] + "TARGET_80387 && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov,fcmov,icmov,icmov") + (set_attr "mode" "SF,SF,SI,SI")]) + +(define_insn "*movdfcc_1" + [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] + "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + # + #" + [(set_attr "type" "fcmov,fcmov,multi,multi") + (set_attr "mode" "DF")]) + +(define_insn "*movdfcc_1_rex64" + [(set (match_operand:DF 0 "register_operand" "=f,f,r,r") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0") + (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))] + "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE + && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3} + cmov%O2%C1\t{%2, %0|%0, %2} + cmov%O2%c1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov,fcmov,icmov,icmov") + (set_attr "mode" "DF")]) + +(define_split + [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "") + (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" + [(match_operand 4 "flags_reg_operand" "") + (const_int 0)]) + (match_operand:DF 2 "nonimmediate_operand" "") + (match_operand:DF 3 "nonimmediate_operand" "")))] + "!TARGET_64BIT && reload_completed" + [(set (match_dup 2) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 5) + (match_dup 6))) + (set (match_dup 3) + (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)]) + (match_dup 7) + (match_dup 8)))] + "split_di (&operands[2], 2, &operands[5], &operands[7]); + split_di (&operands[0], 1, &operands[2], &operands[3]);") + +(define_insn "*movxfcc_1" + [(set (match_operand:XF 0 "register_operand" "=f,f") + (if_then_else:XF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:XF 2 "register_operand" "f,0") + (match_operand:XF 3 "register_operand" "0,f")))] + "TARGET_80387 && TARGET_CMOVE" + "@ + fcmov%F1\t{%2, %0|%0, %2} + fcmov%f1\t{%3, %0|%0, %3}" + [(set_attr "type" "fcmov") + (set_attr "mode" "XF")]) + +;; All moves in SSE5 pcmov instructions are 128 bits and hence we restrict +;; the scalar versions to have only XMM registers as operands. + +;; SSE5 conditional move +(define_insn "*sse5_pcmov_" + [(set (match_operand:MODEF 0 "register_operand" "=x,x") + (if_then_else:MODEF + (match_operand:MODEF 1 "register_operand" "x,0") + (match_operand:MODEF 2 "register_operand" "0,x") + (match_operand:MODEF 3 "register_operand" "x,x")))] + "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" + "pcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}" + [(set_attr "type" "sse4arg")]) + +;; These versions of the min/max patterns are intentionally ignorant of +;; their behavior wrt -0.0 and NaN (via the commutative operand mark). +;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator +;; are undefined in this condition, we're certain this is correct. + +(define_insn "*avx_3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (smaxmin:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "%x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "vs\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (smaxmin:MODEF + (match_operand:MODEF 1 "nonimmediate_operand" "%0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "s\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "")]) + +;; These versions of the min/max patterns implement exactly the operations +;; min = (op1 < op2 ? op1 : op2) +;; max = (!(op1 < op2) ? op1 : op2) +;; Their operands are not commutative, and thus they may be used in the +;; presence of -0.0 and NaN. + +(define_insn "*avx_ieee_smin3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "x") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "vmins\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*ieee_smin3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MIN))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "mins\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "")]) + +(define_insn "*avx_ieee_smax3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "AVX_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "vmaxs\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sseadd") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "*ieee_smax3" + [(set (match_operand:MODEF 0 "register_operand" "=x") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand" "0") + (match_operand:MODEF 2 "nonimmediate_operand" "xm")] + UNSPEC_IEEE_MAX))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" + "maxs\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "")]) + +;; Make two stack loads independent: +;; fld aa fld aa +;; fld %st(0) -> fld bb +;; fmul bb fmul %st(1), %st +;; +;; Actually we only match the last two instructions for simplicity. +(define_peephole2 + [(set (match_operand 0 "fp_register_operand" "") + (match_operand 1 "fp_register_operand" "")) + (set (match_dup 0) + (match_operator 2 "binary_fp_operator" + [(match_dup 0) + (match_operand 3 "memory_operand" "")]))] + "REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_dup 0) (match_dup 3)) + (set (match_dup 0) (match_dup 4))] + + ;; The % modifier is not operational anymore in peephole2's, so we have to + ;; swap the operands manually in the case of addition and multiplication. + "if (COMMUTATIVE_ARITH_P (operands[2])) + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + operands[0], operands[1]); + else + operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]), + operands[1], operands[0]);") + +;; Conditional addition patterns +(define_expand "addcc" + [(match_operand:SWI 0 "register_operand" "") + (match_operand 1 "comparison_operator" "") + (match_operand:SWI 2 "register_operand" "") + (match_operand:SWI 3 "const_int_operand" "")] + "" + "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") + + +;; Misc patterns (?) + +;; This pattern exists to put a dependency on all ebp-based memory accesses. +;; Otherwise there will be nothing to keep +;; +;; [(set (reg ebp) (reg esp))] +;; [(set (reg esp) (plus (reg esp) (const_int -160000))) +;; (clobber (eflags)] +;; [(set (mem (plus (reg ebp) (const_int -160000))) (const_int 0))] +;; +;; in proper program order. +(define_insn "pro_epilogue_adjust_stack_1" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI (match_operand:SI 1 "register_operand" "0,r") + (match_operand:SI 2 "immediate_operand" "i,i"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "!TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOV: + return "mov{l}\t{%1, %0|%0, %1}"; + + case TYPE_ALU: + if (CONST_INT_P (operands[2]) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{l}\t{%2, %0|%0, %2}"; + } + return "add{l}\t{%2, %0|%0, %2}"; + + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{l}\t{%a2, %0|%0, %a2}"; + + default: + gcc_unreachable (); + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "0") + (const_string "alu") + (match_operand:SI 2 "const0_operand" "") + (const_string "imov") + ] + (const_string "lea"))) + (set_attr "mode" "SI")]) + +(define_insn "pro_epilogue_adjust_stack_rex64" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (match_operand:DI 1 "register_operand" "0,r") + (match_operand:DI 2 "x86_64_immediate_operand" "e,e"))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_IMOV: + return "mov{q}\t{%1, %0|%0, %1}"; + + case TYPE_ALU: + if (CONST_INT_P (operands[2]) + /* Avoid overflows. */ + && ((INTVAL (operands[2]) & ((((unsigned int) 1) << 31) - 1))) + && (INTVAL (operands[2]) == 128 + || (INTVAL (operands[2]) < 0 + && INTVAL (operands[2]) != -128))) + { + operands[2] = GEN_INT (-INTVAL (operands[2])); + return "sub{q}\t{%2, %0|%0, %2}"; + } + return "add{q}\t{%2, %0|%0, %2}"; + + case TYPE_LEA: + operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); + return "lea{q}\t{%a2, %0|%0, %a2}"; + + default: + gcc_unreachable (); + } +} + [(set (attr "type") + (cond [(eq_attr "alternative" "0") + (const_string "alu") + (match_operand:DI 2 "const0_operand" "") + (const_string "imov") + ] + (const_string "lea"))) + (set_attr "mode" "DI")]) + +(define_insn "pro_epilogue_adjust_stack_rex64_2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (plus:DI (match_operand:DI 1 "register_operand" "0,r") + (match_operand:DI 3 "immediate_operand" "i,i"))) + (use (match_operand:DI 2 "register_operand" "r,r")) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))] + "TARGET_64BIT" +{ + switch (get_attr_type (insn)) + { + case TYPE_ALU: + return "add{q}\t{%2, %0|%0, %2}"; + + case TYPE_LEA: + operands[2] = gen_rtx_PLUS (DImode, operands[1], operands[2]); + return "lea{q}\t{%a2, %0|%0, %a2}"; + + default: + gcc_unreachable (); + } +} + [(set_attr "type" "alu,lea") + (set_attr "mode" "DI")]) + +(define_insn "allocate_stack_worker_32" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "0")] + UNSPECV_STACK_PROBE)) + (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && TARGET_STACK_PROBE" +{ + if (flag_pic) + return "call\t___chkstk@PLT"; + else + return "call\t___chkstk"; +} + [(set_attr "type" "multi") + (set_attr "length" "5")]) + +(define_insn "allocate_stack_worker_64" + [(set (match_operand:DI 0 "register_operand" "=a") + (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")] + UNSPECV_STACK_PROBE)) + (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 1))) + (clobber (reg:DI R10_REG)) + (clobber (reg:DI R11_REG)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_STACK_PROBE" +{ + if (flag_pic) + return "call\t___chkstk@PLT"; + else + return "call\t___chkstk"; +} + [(set_attr "type" "multi") + (set_attr "length" "5")]) + +(define_expand "allocate_stack" + [(match_operand 0 "register_operand" "") + (match_operand 1 "general_operand" "")] + "TARGET_STACK_PROBE" +{ + rtx x; + +#ifndef CHECK_STACK_LIMIT +#define CHECK_STACK_LIMIT 0 +#endif + + if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1]) + && INTVAL (operands[1]) < CHECK_STACK_LIMIT) + { + x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, operands[1], + stack_pointer_rtx, 0, OPTAB_DIRECT); + if (x != stack_pointer_rtx) + emit_move_insn (stack_pointer_rtx, x); + } + else + { + x = copy_to_mode_reg (Pmode, operands[1]); + if (TARGET_64BIT) + x = gen_allocate_stack_worker_64 (x, x); + else + x = gen_allocate_stack_worker_32 (x, x); + emit_insn (x); + } + + emit_move_insn (operands[0], virtual_stack_dynamic_rtx); + DONE; +}) + +(define_expand "builtin_setjmp_receiver" + [(label_ref (match_operand 0 "" ""))] + "!TARGET_64BIT && flag_pic" +{ +#if TARGET_MACHO + if (TARGET_MACHO) + { + rtx xops[3]; + rtx picreg = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM); + rtx label_rtx = gen_label_rtx (); + emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx)); + xops[0] = xops[1] = picreg; + xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx)); + ix86_expand_binary_operator (MINUS, SImode, xops); + } + else +#endif + emit_insn (gen_set_got (pic_offset_table_rtx)); + DONE; +}) + +;; Avoid redundant prefixes by splitting HImode arithmetic to SImode. + +(define_split + [(set (match_operand 0 "register_operand" "") + (match_operator 3 "promotable_binary_operator" + [(match_operand 1 "register_operand" "") + (match_operand 2 "aligned_operand" "")])) + (clobber (reg:CC FLAGS_REG))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && ((GET_MODE (operands[0]) == HImode + && ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX) + /* ??? next two lines just !satisfies_constraint_K (...) */ + || !CONST_INT_P (operands[2]) + || satisfies_constraint_K (operands[2]))) + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))" + [(parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + if (GET_CODE (operands[3]) != ASHIFT) + operands[2] = gen_lowpart (SImode, operands[2]); + PUT_MODE (operands[3], SImode);") + +; Promote the QImode tests, as i386 has encoding of the AND +; instruction with 32-bit sign-extended immediate and thus the +; instruction size is unchanged, except in the %eax case for +; which it is increased by one byte, hence the ! optimize_size. +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 2 "compare_operator" + [(and (match_operand 3 "aligned_operand" "") + (match_operand 4 "const_int_operand" "")) + (const_int 0)])) + (set (match_operand 1 "register_operand" "") + (and (match_dup 3) (match_dup 4)))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && optimize_insn_for_speed_p () + && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX) + || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode)) + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)" + [(parallel [(set (match_dup 0) + (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4)) + (const_int 0)])) + (set (match_dup 1) + (and:SI (match_dup 3) (match_dup 4)))])] +{ + operands[4] + = gen_int_mode (INTVAL (operands[4]) + & GET_MODE_MASK (GET_MODE (operands[1])), SImode); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[3] = gen_lowpart (SImode, operands[3]); +}) + +; Don't promote the QImode tests, as i386 doesn't have encoding of +; the TEST instruction with 32-bit sign-extended immediate and thus +; the instruction size would at least double, which is not what we +; want even with ! optimize_size. +(define_split + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and (match_operand:HI 2 "aligned_operand" "") + (match_operand:HI 3 "const_int_operand" "")) + (const_int 0)]))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && ! TARGET_FAST_PREFIX + && optimize_insn_for_speed_p () + /* Ensure that the operand will remain sign-extended immediate. */ + && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)" + [(set (match_dup 0) + (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) + (const_int 0)]))] +{ + operands[3] + = gen_int_mode (INTVAL (operands[3]) + & GET_MODE_MASK (GET_MODE (operands[2])), SImode); + operands[2] = gen_lowpart (SImode, operands[2]); +}) + +(define_split + [(set (match_operand 0 "register_operand" "") + (neg (match_operand 1 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" + [(parallel [(set (match_dup 0) + (neg:SI (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]);") + +(define_split + [(set (match_operand 0 "register_operand" "") + (not (match_operand 1 "register_operand" "")))] + "! TARGET_PARTIAL_REG_STALL && reload_completed + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" + [(set (match_dup 0) + (not:SI (match_dup 1)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]);") + +(define_split + [(set (match_operand 0 "register_operand" "") + (if_then_else (match_operator 1 "comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand 2 "register_operand" "") + (match_operand 3 "register_operand" "")))] + "! TARGET_PARTIAL_REG_STALL && TARGET_CMOVE + && (GET_MODE (operands[0]) == HImode + || (GET_MODE (operands[0]) == QImode + && (TARGET_PROMOTE_QImode + || optimize_insn_for_size_p ())))" + [(set (match_dup 0) + (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))] + "operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = gen_lowpart (SImode, operands[3]);") + + +;; RTL Peephole optimizations, run before sched2. These primarily look to +;; transform a complex memory operation into two memory to register operations. + +;; Don't push memory operands +(define_peephole2 + [(set (match_operand:SI 0 "push_operand" "") + (match_operand:SI 1 "memory_operand" "")) + (match_scratch:SI 2 "r")] + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(set (match_operand:DI 0 "push_operand" "") + (match_operand:DI 1 "memory_operand" "")) + (match_scratch:DI 2 "r")] + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; We need to handle SFmode only, because DFmode and XFmode is split to +;; SImode pushes. +(define_peephole2 + [(set (match_operand:SF 0 "push_operand" "") + (match_operand:SF 1 "memory_operand" "")) + (match_scratch:SF 2 "r")] + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(set (match_operand:HI 0 "push_operand" "") + (match_operand:HI 1 "memory_operand" "")) + (match_scratch:HI 2 "r")] + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(set (match_operand:QI 0 "push_operand" "") + (match_operand:QI 1 "memory_operand" "")) + (match_scratch:QI 2 "q")] + "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY + && !RTX_FRAME_RELATED_P (peep2_next_insn (0))" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; Don't move an immediate directly to memory when the instruction +;; gets too big. +(define_peephole2 + [(match_scratch:SI 1 "r") + (set (match_operand:SI 0 "memory_operand" "") + (const_int 0))] + "optimize_insn_for_speed_p () + && ! TARGET_USE_MOV0 + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 1) (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 1))] + "") + +(define_peephole2 + [(match_scratch:HI 1 "r") + (set (match_operand:HI 0 "memory_operand" "") + (const_int 0))] + "optimize_insn_for_speed_p () + && ! TARGET_USE_MOV0 + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 2) (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 1))] + "operands[2] = gen_lowpart (SImode, operands[1]);") + +(define_peephole2 + [(match_scratch:QI 1 "q") + (set (match_operand:QI 0 "memory_operand" "") + (const_int 0))] + "optimize_insn_for_speed_p () + && ! TARGET_USE_MOV0 + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 2) (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 1))] + "operands[2] = gen_lowpart (SImode, operands[1]);") + +(define_peephole2 + [(match_scratch:SI 2 "r") + (set (match_operand:SI 0 "memory_operand" "") + (match_operand:SI 1 "immediate_operand" ""))] + "optimize_insn_for_speed_p () + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(match_scratch:HI 2 "r") + (set (match_operand:HI 0 "memory_operand" "") + (match_operand:HI 1 "immediate_operand" ""))] + "optimize_insn_for_speed_p () + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(match_scratch:QI 2 "q") + (set (match_operand:QI 0 "memory_operand" "") + (match_operand:QI 1 "immediate_operand" ""))] + "optimize_insn_for_speed_p () + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; Don't compare memory with zero, load and use a test instead. +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(match_operand:SI 2 "memory_operand" "") + (const_int 0)])) + (match_scratch:SI 3 "r")] + "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)" + [(set (match_dup 3) (match_dup 2)) + (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))] + "") + +;; NOT is not pairable on Pentium, while XOR is, but one byte longer. +;; Don't split NOTs with a displacement operand, because resulting XOR +;; will not be pairable anyway. +;; +;; On AMD K6, NOT is vector decoded with memory operand that cannot be +;; represented using a modRM byte. The XOR replacement is long decoded, +;; so this split helps here as well. +;; +;; Note: Can't do this as a regular split because we can't get proper +;; lifetime information then. + +(define_peephole2 + [(set (match_operand:SI 0 "nonimmediate_operand" "") + (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))] + "optimize_insn_for_speed_p () + && ((TARGET_NOT_UNPAIRABLE + && (!MEM_P (operands[0]) + || !memory_displacement_operand (operands[0], SImode))) + || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], SImode))) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (xor:SI (match_dup 1) (const_int -1))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_peephole2 + [(set (match_operand:HI 0 "nonimmediate_operand" "") + (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))] + "optimize_insn_for_speed_p () + && ((TARGET_NOT_UNPAIRABLE + && (!MEM_P (operands[0]) + || !memory_displacement_operand (operands[0], HImode))) + || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], HImode))) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (xor:HI (match_dup 1) (const_int -1))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_peephole2 + [(set (match_operand:QI 0 "nonimmediate_operand" "") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))] + "optimize_insn_for_speed_p () + && ((TARGET_NOT_UNPAIRABLE + && (!MEM_P (operands[0]) + || !memory_displacement_operand (operands[0], QImode))) + || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], QImode))) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) + (xor:QI (match_dup 1) (const_int -1))) + (clobber (reg:CC FLAGS_REG))])] + "") + +;; Non pairable "test imm, reg" instructions can be translated to +;; "and imm, reg" if reg dies. The "and" form is also shorter (one +;; byte opcode instead of two, have a short form for byte operands), +;; so do it for other CPUs as well. Given that the value was dead, +;; this should not create any new dependencies. Pass on the sub-word +;; versions if we're concerned about partial register stalls. + +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:SI (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "immediate_operand" "")) + (const_int 0)]))] + "ix86_match_ccmode (insn, CCNOmode) + && (true_regnum (operands[2]) != AX_REG + || satisfies_constraint_K (operands[3])) + && peep2_reg_dead_p (1, operands[2])" + [(parallel + [(set (match_dup 0) + (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3)) + (const_int 0)])) + (set (match_dup 2) + (and:SI (match_dup 2) (match_dup 3)))])] + "") + +;; We don't need to handle HImode case, because it will be promoted to SImode +;; on ! TARGET_PARTIAL_REG_STALL + +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:QI (match_operand:QI 2 "register_operand" "") + (match_operand:QI 3 "immediate_operand" "")) + (const_int 0)]))] + "! TARGET_PARTIAL_REG_STALL + && ix86_match_ccmode (insn, CCNOmode) + && true_regnum (operands[2]) != AX_REG + && peep2_reg_dead_p (1, operands[2])" + [(parallel + [(set (match_dup 0) + (match_op_dup 1 [(and:QI (match_dup 2) (match_dup 3)) + (const_int 0)])) + (set (match_dup 2) + (and:QI (match_dup 2) (match_dup 3)))])] + "") + +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(and:SI + (zero_extract:SI + (match_operand 2 "ext_register_operand" "") + (const_int 8) + (const_int 8)) + (match_operand 3 "const_int_operand" "")) + (const_int 0)]))] + "! TARGET_PARTIAL_REG_STALL + && ix86_match_ccmode (insn, CCNOmode) + && true_regnum (operands[2]) != AX_REG + && peep2_reg_dead_p (1, operands[2])" + [(parallel [(set (match_dup 0) + (match_op_dup 1 + [(and:SI + (zero_extract:SI + (match_dup 2) + (const_int 8) + (const_int 8)) + (match_dup 3)) + (const_int 0)])) + (set (zero_extract:SI (match_dup 2) + (const_int 8) + (const_int 8)) + (and:SI + (zero_extract:SI + (match_dup 2) + (const_int 8) + (const_int 8)) + (match_dup 3)))])] + "") + +;; Don't do logical operations with memory inputs. +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_dup 0) + (match_operand:SI 1 "memory_operand" "")])) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY" + [(set (match_dup 2) (match_dup 1)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 2)])) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_operand:SI 1 "memory_operand" "") + (match_dup 0)])) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY" + [(set (match_dup 2) (match_dup 1)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 2) (match_dup 0)])) + (clobber (reg:CC FLAGS_REG))])] + "") + +;; Prefer Load+RegOp to Mov+MemOp. Watch out for cases when the memory address +;; refers to the destination of the load! + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "")) + (parallel [(set (match_dup 0) + (match_operator:SI 3 "commutative_operator" + [(match_dup 0) + (match_operand:SI 2 "memory_operand" "")])) + (clobber (reg:CC FLAGS_REG))])] + "REGNO (operands[0]) != REGNO (operands[1]) + && GENERAL_REGNO_P (REGNO (operands[0])) + && GENERAL_REGNO_P (REGNO (operands[1]))" + [(set (match_dup 0) (match_dup 4)) + (parallel [(set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 1)])) + (clobber (reg:CC FLAGS_REG))])] + "operands[4] = replace_rtx (operands[2], operands[0], operands[1]);") + +(define_peephole2 + [(set (match_operand 0 "register_operand" "") + (match_operand 1 "register_operand" "")) + (set (match_dup 0) + (match_operator 3 "commutative_operator" + [(match_dup 0) + (match_operand 2 "memory_operand" "")]))] + "REGNO (operands[0]) != REGNO (operands[1]) + && ((MMX_REG_P (operands[0]) && MMX_REG_P (operands[1])) + || (SSE_REG_P (operands[0]) && SSE_REG_P (operands[1])))" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) + (match_op_dup 3 [(match_dup 0) (match_dup 1)]))] + "") + +; Don't do logical operations with memory outputs +; +; These two don't make sense for PPro/PII -- we're expanding a 4-uop +; instruction into two 1-uop insns plus a 2-uop insn. That last has +; the same decoder scheduling characteristics as the original. + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "memory_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_dup 0) + (match_operand:SI 1 "nonmemory_operand" "")])) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE" + [(set (match_dup 2) (match_dup 0)) + (parallel [(set (match_dup 2) + (match_op_dup 3 [(match_dup 2) (match_dup 1)])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 2))] + "") + +(define_peephole2 + [(match_scratch:SI 2 "r") + (parallel [(set (match_operand:SI 0 "memory_operand" "") + (match_operator:SI 3 "arith_or_logical_operator" + [(match_operand:SI 1 "nonmemory_operand" "") + (match_dup 0)])) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE" + [(set (match_dup 2) (match_dup 0)) + (parallel [(set (match_dup 2) + (match_op_dup 3 [(match_dup 1) (match_dup 2)])) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) (match_dup 2))] + "") + +;; Attempt to always use XOR for zeroing registers. +(define_peephole2 + [(set (match_operand 0 "register_operand" "") + (match_operand 1 "const0_operand" ""))] + "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD + && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ()) + && GENERAL_REG_P (operands[0]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (const_int 0)) + (clobber (reg:CC FLAGS_REG))])] +{ + operands[0] = gen_lowpart (word_mode, operands[0]); +}) + +(define_peephole2 + [(set (strict_low_part (match_operand 0 "register_operand" "")) + (const_int 0))] + "(GET_MODE (operands[0]) == QImode + || GET_MODE (operands[0]) == HImode) + && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ()) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0)) + (clobber (reg:CC FLAGS_REG))])]) + +;; For HI and SI modes, or $-1,reg is smaller than mov $-1,reg. +(define_peephole2 + [(set (match_operand 0 "register_operand" "") + (const_int -1))] + "(GET_MODE (operands[0]) == HImode + || GET_MODE (operands[0]) == SImode + || (GET_MODE (operands[0]) == DImode && TARGET_64BIT)) + && (optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (const_int -1)) + (clobber (reg:CC FLAGS_REG))])] + "operands[0] = gen_lowpart (GET_MODE (operands[0]) == DImode ? DImode : SImode, + operands[0]);") + +;; Attempt to convert simple leas to adds. These can be created by +;; move expanders. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_dup 0) + (match_operand:SI 1 "nonmemory_operand" "")))] + "peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (subreg:SI (plus:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "nonmemory_operand" "")) 0))] + "peep2_regno_dead_p (0, FLAGS_REG) && REGNO (operands[0]) == REGNO (operands[1])" + [(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = gen_lowpart (SImode, operands[2]);") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (plus:DI (match_dup 0) + (match_operand:DI 1 "x86_64_general_operand" "")))] + "peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 1))) + (clobber (reg:CC FLAGS_REG))])] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_dup 0) + (match_operand:SI 1 "const_int_operand" "")))] + "exact_log2 (INTVAL (operands[1])) >= 0 + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_dup 0) + (match_operand:DI 1 "const_int_operand" "")))] + "exact_log2 (INTVAL (operands[1])) >= 0 + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (subreg:SI (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "const_int_operand" "")) 0))] + "exact_log2 (INTVAL (operands[2])) >= 0 + && REGNO (operands[0]) == REGNO (operands[1]) + && peep2_regno_dead_p (0, FLAGS_REG)" + [(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));") + +;; The ESP adjustments can be done by the push and pop instructions. Resulting +;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes. On +;; many CPUs it is also faster, since special hardware to avoid esp +;; dependencies is present. + +;; While some of these conversions may be done using splitters, we use peepholes +;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL. + +;; Convert prologue esp subtractions to push. +;; We need register to push. In order to keep verify_flow_info happy we have +;; two choices +;; - use scratch and clobber it in order to avoid dependencies +;; - use already live register +;; We can't use the second way right now, since there is no reliable way how to +;; verify that given register is live. First choice will also most likely in +;; fewer dependencies. On the place of esp adjustments it is very likely that +;; call clobbered registers are dead. We may want to use base pointer as an +;; alternative when no register is available later. + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) + (clobber (mem:BLK (scratch)))])]) + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) + (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) + (clobber (mem:BLK (scratch)))])]) + +;; Convert esp subtractions to push. +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))]) + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0)) + (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))]) + +;; Convert epilogue deallocator to pop. +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (mem:BLK (scratch)))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:SI 0 "r") + (match_scratch:SI 1 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (mem:BLK (scratch)))]) + (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] + "") + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_insn_for_size_p ()" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (mem:BLK (scratch)))]) + (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] + "") + +;; Convert esp additions to pop. +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4))) + (clobber (reg:CC FLAGS_REG))])] + "" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:SI 0 "r") + (match_scratch:SI 1 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG))])] + "" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))]) + (parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] + "") + +(define_peephole2 + [(match_scratch:SI 0 "r") + (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p ()" + [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))]) + (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])] + "") + +;; Convert compares with 1 to shorter inc/dec operations when CF is not +;; required and register dies. Similarly for 128 to -128. +(define_peephole2 + [(set (match_operand 0 "flags_reg_operand" "") + (match_operator 1 "compare_operator" + [(match_operand 2 "register_operand" "") + (match_operand 3 "const_int_operand" "")]))] + "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_size) + && incdec_operand (operands[3], GET_MODE (operands[3]))) + || (!TARGET_FUSE_CMP_AND_BRANCH + && INTVAL (operands[3]) == 128)) + && ix86_match_ccmode (insn, CCGCmode) + && peep2_reg_dead_p (1, operands[2])" + [(parallel [(set (match_dup 0) + (match_op_dup 1 [(match_dup 2) (match_dup 3)])) + (clobber (match_dup 2))])] + "") + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) + (clobber (mem:BLK (scratch)))])]) + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) + (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) + (clobber (mem:BLK (scratch)))])]) + +;; Convert esp subtractions to push. +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4" + [(clobber (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))]) + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8" + [(clobber (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0)) + (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))]) + +;; Convert epilogue deallocator to pop. +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (mem:BLK (scratch)))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:DI 0 "r") + (match_scratch:DI 1 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (mem:BLK (scratch)))]) + (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] + "") + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) + (clobber (reg:CC FLAGS_REG)) + (clobber (mem:BLK (scratch)))])] + "optimize_insn_for_size_p ()" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (mem:BLK (scratch)))]) + (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] + "") + +;; Convert esp additions to pop. +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8))) + (clobber (reg:CC FLAGS_REG))])] + "" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] + "") + +;; Two pops case is tricky, since pop causes dependency on destination register. +;; We use two registers if available. +(define_peephole2 + [(match_scratch:DI 0 "r") + (match_scratch:DI 1 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) + (clobber (reg:CC FLAGS_REG))])] + "" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))]) + (parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] + "") + +(define_peephole2 + [(match_scratch:DI 0 "r") + (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_size_p ()" + [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))]) + (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG))) + (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])] + "") + +;; Convert imul by three, five and nine into lea +(define_peephole2 + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9" + [(set (match_dup 0) + (plus:SI (mult:SI (match_dup 1) (match_dup 2)) + (match_dup 1)))] + { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) + +(define_peephole2 + [(parallel + [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "optimize_insn_for_speed_p () + && (INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 0) + (plus:SI (mult:SI (match_dup 0) (match_dup 2)) + (match_dup 0)))] + { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) + +(define_peephole2 + [(parallel + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT + && (INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9)" + [(set (match_dup 0) + (plus:DI (mult:DI (match_dup 1) (match_dup 2)) + (match_dup 1)))] + { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) + +(define_peephole2 + [(parallel + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_64BIT + && optimize_insn_for_speed_p () + && (INTVAL (operands[2]) == 3 + || INTVAL (operands[2]) == 5 + || INTVAL (operands[2]) == 9)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 0) + (plus:DI (mult:DI (match_dup 0) (match_dup 2)) + (match_dup 0)))] + { operands[2] = GEN_INT (INTVAL (operands[2]) - 1); }) + +;; Imul $32bit_imm, mem, reg is vector decoded, while +;; imul $32bit_imm, reg, reg is direct decoded. +(define_peephole2 + [(match_scratch:DI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "memory_operand" "") + (match_operand:DI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () + && !satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +"") + +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () + && !satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +"") + +(define_peephole2 + [(match_scratch:SI 3 "r") + (parallel [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (mult:SI (match_operand:SI 1 "memory_operand" "") + (match_operand:SI 2 "immediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p () + && !satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 1)) + (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2)))) + (clobber (reg:CC FLAGS_REG))])] +"") + +;; imul $8/16bit_imm, regmem, reg is vector decoded. +;; Convert it into imul reg, reg +;; It would be better to force assembler to encode instruction using long +;; immediate, but there is apparently no way to do so. +(define_peephole2 + [(parallel [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "nonimmediate_operand" "") + (match_operand:DI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_scratch:DI 3 "r")] + "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p () + && satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + +(define_peephole2 + [(parallel [(set (match_operand:SI 0 "register_operand" "") + (mult:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:SI 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_scratch:SI 3 "r")] + "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p () + && satisfies_constraint_K (operands[2])" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + +(define_peephole2 + [(parallel [(set (match_operand:HI 0 "register_operand" "") + (mult:HI (match_operand:HI 1 "nonimmediate_operand" "") + (match_operand:HI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (match_scratch:HI 3 "r")] + "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()" + [(set (match_dup 3) (match_dup 2)) + (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] +{ + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); +}) + +;; After splitting up read-modify operations, array accesses with memory +;; operands might end up in form: +;; sall $2, %eax +;; movl 4(%esp), %edx +;; addl %edx, %eax +;; instead of pre-splitting: +;; sall $2, %eax +;; addl 4(%esp), %eax +;; Turn it into: +;; movl 4(%esp), %edx +;; leal (%edx,%eax,4), %eax + +(define_peephole2 + [(parallel [(set (match_operand 0 "register_operand" "") + (ashift (match_operand 1 "register_operand" "") + (match_operand 2 "const_int_operand" ""))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_operand 3 "register_operand") + (match_operand 4 "x86_64_general_operand" "")) + (parallel [(set (match_operand 5 "register_operand" "") + (plus (match_operand 6 "register_operand" "") + (match_operand 7 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))])] + "INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 3 + /* Validate MODE for lea. */ + && ((!TARGET_PARTIAL_REG_STALL + && (GET_MODE (operands[0]) == QImode + || GET_MODE (operands[0]) == HImode)) + || GET_MODE (operands[0]) == SImode + || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)) + /* We reorder load and the shift. */ + && !rtx_equal_p (operands[1], operands[3]) + && !reg_overlap_mentioned_p (operands[0], operands[4]) + /* Last PLUS must consist of operand 0 and 3. */ + && !rtx_equal_p (operands[0], operands[3]) + && (rtx_equal_p (operands[3], operands[6]) + || rtx_equal_p (operands[3], operands[7])) + && (rtx_equal_p (operands[0], operands[6]) + || rtx_equal_p (operands[0], operands[7])) + /* The intermediate operand 0 must die or be same as output. */ + && (rtx_equal_p (operands[0], operands[5]) + || peep2_reg_dead_p (3, operands[0]))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 0) (match_dup 1))] +{ + enum machine_mode mode = GET_MODE (operands[5]) == DImode ? DImode : SImode; + int scale = 1 << INTVAL (operands[2]); + rtx index = gen_lowpart (Pmode, operands[1]); + rtx base = gen_lowpart (Pmode, operands[3]); + rtx dest = gen_lowpart (mode, operands[5]); + + operands[1] = gen_rtx_PLUS (Pmode, base, + gen_rtx_MULT (Pmode, index, GEN_INT (scale))); + if (mode != Pmode) + operands[1] = gen_rtx_SUBREG (mode, operands[1], 0); + operands[0] = dest; +}) + +;; Call-value patterns last so that the wildcard operand does not +;; disrupt insn-recog's switch tables. + +(define_insn "*call_value_pop_0" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "")))] + "!TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_pop_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i")))] + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" +{ + if (constant_call_address_operand (operands[1], Pmode)) + return "call\t%P1"; + return "call\t%A1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_pop_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) + (match_operand:SI 2 "" ""))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand:SI 3 "immediate_operand" "i,i")))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + "@ + jmp\t%P1 + jmp\t%A1" + [(set_attr "type" "callv")]) + +(define_insn "*call_value_0" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "constant_call_address_operand" "")) + (match_operand:SI 2 "" "")))] + "!TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_0_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" "")))] + "TARGET_64BIT" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_0_rex64_ms_sysv" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" "")) + (match_operand:DI 2 "const_int_operand" ""))) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI XMM6_REG)) + (clobber (reg:TI XMM7_REG)) + (clobber (reg:TI XMM8_REG)) + (clobber (reg:TI XMM9_REG)) + (clobber (reg:TI XMM10_REG)) + (clobber (reg:TI XMM11_REG)) + (clobber (reg:TI XMM12_REG)) + (clobber (reg:TI XMM13_REG)) + (clobber (reg:TI XMM14_REG)) + (clobber (reg:TI XMM15_REG)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" +{ + if (SIBLING_CALL_P (insn)) + return "jmp\t%P1"; + else + return "call\t%P1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) + (match_operand:SI 2 "" "")))] + "!TARGET_64BIT && !SIBLING_CALL_P (insn)" +{ + if (constant_call_address_operand (operands[1], Pmode)) + return "call\t%P1"; + return "call\t%A1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) + (match_operand:SI 2 "" "")))] + "!TARGET_64BIT && SIBLING_CALL_P (insn)" + "@ + jmp\t%P1 + jmp\t%A1" + [(set_attr "type" "callv")]) + +(define_insn "*call_value_1_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (match_operand:DI 2 "" "")))] + "TARGET_64BIT && !SIBLING_CALL_P (insn) + && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" +{ + if (constant_call_address_operand (operands[1], Pmode)) + return "call\t%P1"; + return "call\t%A1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_1_rex64_ms_sysv" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (match_operand:DI 2 "" ""))) + (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) + (clobber (reg:TI 27)) + (clobber (reg:TI 28)) + (clobber (reg:TI 45)) + (clobber (reg:TI 46)) + (clobber (reg:TI 47)) + (clobber (reg:TI 48)) + (clobber (reg:TI 49)) + (clobber (reg:TI 50)) + (clobber (reg:TI 51)) + (clobber (reg:TI 52)) + (clobber (reg:DI SI_REG)) + (clobber (reg:DI DI_REG))] + "!SIBLING_CALL_P (insn) && TARGET_64BIT" +{ + if (constant_call_address_operand (operands[1], Pmode)) + return "call\t%P1"; + return "call\t%A1"; +} + [(set_attr "type" "callv")]) + +(define_insn "*call_value_1_rex64_large" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm")) + (match_operand:DI 2 "" "")))] + "TARGET_64BIT && !SIBLING_CALL_P (insn)" + "call\t%A1" + [(set_attr "type" "callv")]) + +(define_insn "*sibcall_value_1_rex64" + [(set (match_operand 0 "" "") + (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U")) + (match_operand:DI 2 "" "")))] + "TARGET_64BIT && SIBLING_CALL_P (insn)" + "@ + jmp\t%P1 + jmp\t%A1" + [(set_attr "type" "callv")]) + +;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5. +;; That, however, is usually mapped by the OS to SIGSEGV, which is often +;; caught for use by garbage collectors and the like. Using an insn that +;; maps to SIGILL makes it more likely the program will rightfully die. +;; Keeping with tradition, "6" is in honor of #UD. +(define_insn "trap" + [(trap_if (const_int 1) (const_int 6))] + "" + { return ASM_SHORT "0x0b0f"; } + [(set_attr "length" "2")]) + +(define_expand "sse_prologue_save" + [(parallel [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(reg:DI 21) + (reg:DI 22) + (reg:DI 23) + (reg:DI 24) + (reg:DI 25) + (reg:DI 26) + (reg:DI 27) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:DI 2 "immediate_operand" "")) + (use (label_ref:DI (match_operand 3 "" "")))])] + "TARGET_64BIT" + "") + +(define_insn "*sse_prologue_save_insn" + [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R") + (match_operand:DI 4 "const_int_operand" "n"))) + (unspec:BLK [(reg:DI 21) + (reg:DI 22) + (reg:DI 23) + (reg:DI 24) + (reg:DI 25) + (reg:DI 26) + (reg:DI 27) + (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE)) + (use (match_operand:DI 1 "register_operand" "r")) + (use (match_operand:DI 2 "const_int_operand" "i")) + (use (label_ref:DI (match_operand 3 "" "X")))] + "TARGET_64BIT + && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128 + && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128" +{ + int i; + operands[0] = gen_rtx_MEM (Pmode, + gen_rtx_PLUS (Pmode, operands[0], operands[4])); + /* VEX instruction with a REX prefix will #UD. */ + if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS) + gcc_unreachable (); + + output_asm_insn ("jmp\t%A1", operands); + for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--) + { + operands[4] = adjust_address (operands[0], DImode, i*16); + operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i)); + PUT_MODE (operands[4], TImode); + if (GET_CODE (XEXP (operands[0], 0)) != PLUS) + output_asm_insn ("rex", operands); + output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands); + } + (*targetm.asm_out.internal_label) (asm_out_file, "L", + CODE_LABEL_NUMBER (operands[3])); + return ""; +} + [(set_attr "type" "other") + (set_attr "length_immediate" "0") + (set_attr "length_address" "0") + (set (attr "length") + (if_then_else + (eq (symbol_ref "TARGET_AVX") (const_int 0)) + (const_string "34") + (const_string "42"))) + (set_attr "memory" "store") + (set_attr "modrm" "0") + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "DI")]) + +(define_expand "prefetch" + [(prefetch (match_operand 0 "address_operand" "") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE || TARGET_3DNOW" +{ + int rw = INTVAL (operands[1]); + int locality = INTVAL (operands[2]); + + gcc_assert (rw == 0 || rw == 1); + gcc_assert (locality >= 0 && locality <= 3); + gcc_assert (GET_MODE (operands[0]) == Pmode + || GET_MODE (operands[0]) == VOIDmode); + + /* Use 3dNOW prefetch in case we are asking for write prefetch not + supported by SSE counterpart or the SSE prefetch is not available + (K6 machines). Otherwise use SSE prefetch as it allows specifying + of locality. */ + if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw)) + operands[2] = GEN_INT (3); + else + operands[1] = const0_rtx; +}) + +(define_insn "*prefetch_sse" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (const_int 0) + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE && !TARGET_64BIT" +{ + static const char * const patterns[4] = { + "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" + }; + + int locality = INTVAL (operands[1]); + gcc_assert (locality >= 0 && locality <= 3); + + return patterns[locality]; +} + [(set_attr "type" "sse") + (set_attr "memory" "none")]) + +(define_insn "*prefetch_sse_rex" + [(prefetch (match_operand:DI 0 "address_operand" "p") + (const_int 0) + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE && TARGET_64BIT" +{ + static const char * const patterns[4] = { + "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" + }; + + int locality = INTVAL (operands[1]); + gcc_assert (locality >= 0 && locality <= 3); + + return patterns[locality]; +} + [(set_attr "type" "sse") + (set_attr "memory" "none")]) + +(define_insn "*prefetch_3dnow" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 3))] + "TARGET_3DNOW && !TARGET_64BIT" +{ + if (INTVAL (operands[1]) == 0) + return "prefetch\t%a0"; + else + return "prefetchw\t%a0"; +} + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) + +(define_insn "*prefetch_3dnow_rex" + [(prefetch (match_operand:DI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 3))] + "TARGET_3DNOW && TARGET_64BIT" +{ + if (INTVAL (operands[1]) == 0) + return "prefetch\t%a0"; + else + return "prefetchw\t%a0"; +} + [(set_attr "type" "mmx") + (set_attr "memory" "none")]) + +(define_expand "stack_protect_set" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")] + "" +{ +#ifdef TARGET_THREAD_SSP_OFFSET + if (TARGET_64BIT) + emit_insn (gen_stack_tls_protect_set_di (operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); + else + emit_insn (gen_stack_tls_protect_set_si (operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); +#else + if (TARGET_64BIT) + emit_insn (gen_stack_protect_set_di (operands[0], operands[1])); + else + emit_insn (gen_stack_protect_set_si (operands[0], operands[1])); +#endif + DONE; +}) + +(define_insn "stack_protect_set_si" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "mov{l}\t{%1, %2|%2, %1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" + [(set_attr "type" "multi")]) + +(define_insn "stack_protect_set_di" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:DI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "mov{q}\t{%1, %2|%2, %1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_set_si" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "" + "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_set_di" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET)) + (set (match_scratch:DI 2 "=&r") (const_int 0)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + { + /* The kernel uses a different segment register for performance reasons; a + system call would not have to trash the userspace segment register, + which would be expensive */ + if (ix86_cmodel != CM_KERNEL) + return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; + else + return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"; + } + [(set_attr "type" "multi")]) + +(define_expand "stack_protect_test" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "") + (match_operand 2 "" "")] + "" +{ + rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG); + ix86_compare_op0 = operands[0]; + ix86_compare_op1 = operands[1]; + ix86_compare_emitted = flags; + +#ifdef TARGET_THREAD_SSP_OFFSET + if (TARGET_64BIT) + emit_insn (gen_stack_tls_protect_test_di (flags, operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); + else + emit_insn (gen_stack_tls_protect_test_si (flags, operands[0], + GEN_INT (TARGET_THREAD_SSP_OFFSET))); +#else + if (TARGET_64BIT) + emit_insn (gen_stack_protect_test_di (flags, operands[0], operands[1])); + else + emit_insn (gen_stack_protect_test_si (flags, operands[0], operands[1])); +#endif + emit_jump_insn (gen_beq (operands[2])); + DONE; +}) + +(define_insn "stack_protect_test_si" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (clobber (match_scratch:SI 3 "=&r"))] + "" + "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%2, %3|%3, %2}" + [(set_attr "type" "multi")]) + +(define_insn "stack_protect_test_di" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (clobber (match_scratch:DI 3 "=&r"))] + "TARGET_64BIT" + "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%2, %3|%3, %2}" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_test_si" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "const_int_operand" "i")] + UNSPEC_SP_TLS_TEST)) + (clobber (match_scratch:SI 3 "=r"))] + "" + "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR gs:%P2}" + [(set_attr "type" "multi")]) + +(define_insn "stack_tls_protect_test_di" + [(set (match_operand:CCZ 0 "flags_reg_operand" "") + (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "const_int_operand" "i")] + UNSPEC_SP_TLS_TEST)) + (clobber (match_scratch:DI 3 "=r"))] + "TARGET_64BIT" + { + /* The kernel uses a different segment register for performance reasons; a + system call would not have to trash the userspace segment register, + which would be expensive */ + if (ix86_cmodel != CM_KERNEL) + return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR fs:%P2}"; + else + return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR gs:%P2}"; + } + [(set_attr "type" "multi")]) + +(define_mode_iterator CRC32MODE [QI HI SI]) +(define_mode_attr crc32modesuffix [(QI "{b}") (HI "{w}") (SI "{l}")]) +(define_mode_attr crc32modeconstraint [(QI "qm") (HI "rm") (SI "rm")]) + +(define_insn "sse4_2_crc32" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI + [(match_operand:SI 1 "register_operand" "0") + (match_operand:CRC32MODE 2 "nonimmediate_operand" "")] + UNSPEC_CRC32))] + "TARGET_SSE4_2" + "crc32\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "SI")]) + +(define_insn "sse4_2_crc32di" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec:DI + [(match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "rm")] + UNSPEC_CRC32))] + "TARGET_SSE4_2 && TARGET_64BIT" + "crc32{q}\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_rep" "1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "DI")]) + +(include "mmx.md") +(include "sse.md") +(include "sync.md") -- cgit v1.2.3