diff options
Diffstat (limited to 'gcc-4.9/gcc/config/sh/sh.md')
-rw-r--r-- | gcc-4.9/gcc/config/sh/sh.md | 15960 |
1 files changed, 15960 insertions, 0 deletions
diff --git a/gcc-4.9/gcc/config/sh/sh.md b/gcc-4.9/gcc/config/sh/sh.md new file mode 100644 index 000000000..ab1f0a51c --- /dev/null +++ b/gcc-4.9/gcc/config/sh/sh.md @@ -0,0 +1,15960 @@ +;;- Machine description for Renesas / SuperH SH. +;; Copyright (C) 1993-2014 Free Software Foundation, Inc. +;; Contributed by Steve Chamberlain (sac@cygnus.com). +;; Improved by Jim Wilson (wilson@cygnus.com). + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + + +;; ??? Should prepend a * to all pattern names which are not used. +;; This will make the compiler smaller, and rebuilds after changes faster. + +;; ??? Should be enhanced to include support for many more GNU superoptimizer +;; sequences. Especially the sequences for arithmetic right shifts. + +;; ??? Should check all DImode patterns for consistency and usefulness. + +;; ??? The MAC.W and MAC.L instructions are not supported. There is no +;; way to generate them. + +;; BSR is not generated by the compiler proper, but when relaxing, it +;; generates .uses pseudo-ops that allow linker relaxation to create +;; BSR. This is actually implemented in bfd/{coff,elf32}-sh.c + +;; Special constraints for SH machine description: +;; +;; t -- T +;; x -- mac +;; l -- pr +;; z -- r0 +;; +;; Special formats used for outputting SH instructions: +;; +;; %. -- print a .s if insn needs delay slot +;; %@ -- print rte/rts if is/isn't an interrupt function +;; %# -- output a nop if there is nothing to put in the delay slot +;; %O -- print a constant without the # +;; %R -- print the lsw reg of a double +;; %S -- print the msw reg of a double +;; %T -- print next word of a double REG or MEM +;; +;; Special predicates: +;; +;; arith_operand -- operand is valid source for arithmetic op +;; arith_reg_operand -- operand is valid register for arithmetic op +;; general_movdst_operand -- operand is valid move destination +;; general_movsrc_operand -- operand is valid move source +;; logical_operand -- operand is valid source for logical op + +;; ------------------------------------------------------------------------- +;; Constants +;; ------------------------------------------------------------------------- + +(define_constants [ + (AP_REG 145) + (PR_REG 146) + (T_REG 147) + (GBR_REG 144) + (MACH_REG 148) + (MACL_REG 149) + (FPUL_REG 150) + (RAP_REG 152) + + (FPSCR_REG 151) + + (PIC_REG 12) + (FP_REG 14) + (SP_REG 15) + + (PR_MEDIA_REG 18) + (T_MEDIA_REG 19) + + (R0_REG 0) + (R1_REG 1) + (R2_REG 2) + (R3_REG 3) + (R4_REG 4) + (R5_REG 5) + (R6_REG 6) + (R7_REG 7) + (R8_REG 8) + (R9_REG 9) + (R10_REG 10) + (R20_REG 20) + (R21_REG 21) + (R22_REG 22) + (R23_REG 23) + + (DR0_REG 64) + (DR2_REG 66) + (DR4_REG 68) + (FR23_REG 87) + + (TR0_REG 128) + (TR1_REG 129) + (TR2_REG 130) + + (XD0_REG 136) + + ;; These are used with unspec. + (UNSPEC_COMPACT_ARGS 0) + (UNSPEC_MOVA 1) + (UNSPEC_CASESI 2) + (UNSPEC_DATALABEL 3) + (UNSPEC_BBR 4) + (UNSPEC_SFUNC 5) + (UNSPEC_PIC 6) + (UNSPEC_GOT 7) + (UNSPEC_GOTOFF 8) + (UNSPEC_PLT 9) + (UNSPEC_CALLER 10) + (UNSPEC_GOTPLT 11) + (UNSPEC_ICACHE 12) + (UNSPEC_INIT_TRAMP 13) + (UNSPEC_FCOSA 14) + (UNSPEC_FSRRA 15) + (UNSPEC_FSINA 16) + (UNSPEC_NSB 17) + (UNSPEC_ALLOCO 18) + (UNSPEC_TLSGD 20) + (UNSPEC_TLSLDM 21) + (UNSPEC_TLSIE 22) + (UNSPEC_DTPOFF 23) + (UNSPEC_GOTTPOFF 24) + (UNSPEC_TPOFF 25) + (UNSPEC_RA 26) + (UNSPEC_DIV_INV_M0 30) + (UNSPEC_DIV_INV_M1 31) + (UNSPEC_DIV_INV_M2 32) + (UNSPEC_DIV_INV_M3 33) + (UNSPEC_DIV_INV20 34) + (UNSPEC_DIV_INV_TABLE 37) + (UNSPEC_ASHIFTRT 35) + (UNSPEC_THUNK 36) + (UNSPEC_CHKADD 38) + (UNSPEC_SP_SET 40) + (UNSPEC_SP_TEST 41) + (UNSPEC_MOVUA 42) + + ;; (unspec [VAL SHIFT] UNSPEC_EXTRACT_S16) computes (short) (VAL >> SHIFT). + ;; UNSPEC_EXTRACT_U16 is the unsigned equivalent. + (UNSPEC_EXTRACT_S16 43) + (UNSPEC_EXTRACT_U16 44) + + ;; (unspec [TARGET ANCHOR] UNSPEC_SYMOFF) == TARGET - ANCHOR. + (UNSPEC_SYMOFF 45) + + ;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .). + (UNSPEC_PCREL_SYMOFF 46) + + ;; Misc builtins + (UNSPEC_BUILTIN_STRLEN 47) + + ;; These are used with unspec_volatile. + (UNSPECV_BLOCKAGE 0) + (UNSPECV_ALIGN 1) + (UNSPECV_CONST2 2) + (UNSPECV_CONST4 4) + (UNSPECV_CONST8 6) + (UNSPECV_WINDOW_END 10) + (UNSPECV_CONST_END 11) + (UNSPECV_EH_RETURN 12) + (UNSPECV_GBR 13) + (UNSPECV_SP_SWITCH_B 14) + (UNSPECV_SP_SWITCH_E 15) +]) + +;; ------------------------------------------------------------------------- +;; Attributes +;; ------------------------------------------------------------------------- + +;; Target CPU. + +(define_attr "cpu" + "sh1,sh2,sh2e,sh2a,sh3,sh3e,sh4,sh4a,sh5" + (const (symbol_ref "sh_cpu_attr"))) + +(define_attr "endian" "big,little" + (const (if_then_else (symbol_ref "TARGET_LITTLE_ENDIAN") + (const_string "little") (const_string "big")))) + +;; Indicate if the default fpu mode is single precision. +(define_attr "fpu_single" "yes,no" + (const (if_then_else (symbol_ref "TARGET_FPU_SINGLE") + (const_string "yes") (const_string "no")))) + +(define_attr "fmovd" "yes,no" + (const (if_then_else (symbol_ref "TARGET_FMOVD") + (const_string "yes") (const_string "no")))) +;; pipeline model +(define_attr "pipe_model" "sh1,sh4,sh5media" + (const + (cond [(symbol_ref "TARGET_SHMEDIA") (const_string "sh5media") + (symbol_ref "TARGET_SUPERSCALAR") (const_string "sh4")] + (const_string "sh1")))) + +;; cbranch conditional branch instructions +;; jump unconditional jumps +;; arith ordinary arithmetic +;; arith3 a compound insn that behaves similarly to a sequence of +;; three insns of type arith +;; arith3b like above, but might end with a redirected branch +;; load from memory +;; load_si Likewise, SImode variant for general register. +;; fload Likewise, but load to fp register. +;; store to memory +;; fstore floating point register to memory +;; move general purpose register to register +;; movi8 8-bit immediate to general purpose register +;; mt_group other sh4 mt instructions +;; fmove register to register, floating point +;; smpy word precision integer multiply +;; dmpy longword or doublelongword precision integer multiply +;; return rts +;; pload load of pr reg, which can't be put into delay slot of rts +;; prset copy register to pr reg, ditto +;; pstore store of pr reg, which can't be put into delay slot of jsr +;; prget copy pr to register, ditto +;; pcload pc relative load of constant value +;; pcfload Likewise, but load to fp register. +;; pcload_si Likewise, SImode variant for general register. +;; rte return from exception +;; sfunc special function call with known used registers +;; call function call +;; fp floating point +;; fpscr_toggle toggle a bit in the fpscr +;; fdiv floating point divide (or square root) +;; gp_fpul move from general purpose register to fpul +;; fpul_gp move from fpul to general purpose register +;; mac_gp move from mac[lh] to general purpose register +;; gp_mac move from general purpose register to mac[lh] +;; mac_mem move from mac[lh] to memory +;; mem_mac move from memory to mac[lh] +;; dfp_arith,dfp_mul, fp_cmp,dfp_cmp,dfp_conv +;; ftrc_s fix_truncsfsi2_i4 +;; dfdiv double precision floating point divide (or square root) +;; cwb ic_invalidate_line_i +;; movua SH4a unaligned load +;; fsrra square root reciprocal approximate +;; fsca sine and cosine approximate +;; tls_load load TLS related address +;; arith_media SHmedia arithmetic, logical, and shift instructions +;; cbranch_media SHmedia conditional branch instructions +;; cmp_media SHmedia compare instructions +;; dfdiv_media SHmedia double precision divide and square root +;; dfmul_media SHmedia double precision multiply instruction +;; dfparith_media SHmedia double precision floating point arithmetic +;; dfpconv_media SHmedia double precision floating point conversions +;; dmpy_media SHmedia longword multiply +;; fcmp_media SHmedia floating point compare instructions +;; fdiv_media SHmedia single precision divide and square root +;; fload_media SHmedia floating point register load instructions +;; fmove_media SHmedia floating point register moves (inc. fabs and fneg) +;; fparith_media SHmedia single precision floating point arithmetic +;; fpconv_media SHmedia single precision floating point conversions +;; fstore_media SHmedia floating point register store instructions +;; gettr_media SHmedia gettr instruction +;; invalidate_line_media SHmedia invalidate_line sequence +;; jump_media SHmedia unconditional branch instructions +;; load_media SHmedia general register load instructions +;; pt_media SHmedia pt instruction (expanded by assembler) +;; ptabs_media SHmedia ptabs instruction +;; store_media SHmedia general register store instructions +;; mcmp_media SHmedia multimedia compare, absolute, saturating ops +;; mac_media SHmedia mac-style fixed point operations +;; d2mpy_media SHmedia: two 32-bit integer multiplies +;; atrans_media SHmedia approximate transcendental functions +;; ustore_media SHmedia unaligned stores +;; nil no-op move, will be deleted. + +(define_attr "type" + "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si, + fload,store,fstore,move,movi8,fmove,smpy,dmpy,return,pload,prset,pstore, + prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fpscr_toggle,fdiv,ftrc_s, + dfp_arith,dfp_mul,fp_cmp,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp, + gp_mac,mac_mem,mem_mac,mem_fpscr,gp_fpscr,cwb,movua,fsrra,fsca,tls_load, + arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media, + dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media, + fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media, + jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media, + d2mpy_media,atrans_media,ustore_media,nil,other" + (const_string "other")) + +;; We define a new attribute namely "insn_class".We use +;; this for the DFA based pipeline description. +;; +;; mt_group SH4 "mt" group instructions. +;; +;; ex_group SH4 "ex" group instructions. +;; +;; ls_group SH4 "ls" group instructions. +;; +(define_attr "insn_class" + "mt_group,ex_group,ls_group,br_group,fe_group,co_group,none" + (cond [(eq_attr "type" "move,mt_group") (const_string "mt_group") + (eq_attr "type" "movi8,arith,dyn_shift") (const_string "ex_group") + (eq_attr "type" "fmove,load,pcload,load_si,pcload_si,fload,pcfload, + store,fstore,gp_fpul,fpul_gp") (const_string "ls_group") + (eq_attr "type" "cbranch,jump") (const_string "br_group") + (eq_attr "type" "fp,fp_cmp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv") + (const_string "fe_group") + (eq_attr "type" "jump_ind,smpy,dmpy,mac_gp,return,pload,prset,pstore, + prget,rte,sfunc,call,dfp_cmp,mem_fpscr,gp_fpscr,cwb, + gp_mac,mac_mem,mem_mac") (const_string "co_group")] + (const_string "none"))) + +;; nil are zero instructions, and arith3 / arith3b are multiple instructions, +;; so these do not belong in an insn group, although they are modeled +;; with their own define_insn_reservations. + +;; Indicate what precision must be selected in fpscr for this insn, if any. +(define_attr "fp_mode" "single,double,none" (const_string "none")) + +;; Indicate if the fpu mode is set by this instruction +;; "unknown" must have the value as "none" in fp_mode, and means +;; that the instruction/abi has left the processor in an unknown +;; state. +;; "none" means that nothing has changed and no mode is set. +;; This attribute is only used for the Renesas ABI. +(define_attr "fp_set" "single,double,unknown,none" (const_string "none")) + +; If a conditional branch destination is within -252..258 bytes away +; from the instruction it can be 2 bytes long. Something in the +; range -4090..4100 bytes can be 6 bytes long. All other conditional +; branches are initially assumed to be 16 bytes long. +; In machine_dependent_reorg, we split all branches that are longer than +; 2 bytes. + +;; The maximum range used for SImode constant pool entries is 1018. A final +;; instruction can add 8 bytes while only being 4 bytes in size, thus we +;; can have a total of 1022 bytes in the pool. Add 4 bytes for a branch +;; instruction around the pool table, 2 bytes of alignment before the table, +;; and 30 bytes of alignment after the table. That gives a maximum total +;; pool size of 1058 bytes. +;; Worst case code/pool content size ratio is 1:2 (using asms). +;; Thus, in the worst case, there is one instruction in front of a maximum +;; sized pool, and then there are 1052 bytes of pool for every 508 bytes of +;; code. For the last n bytes of code, there are 2n + 36 bytes of pool. +;; If we have a forward branch, the initial table will be put after the +;; unconditional branch. +;; +;; ??? We could do much better by keeping track of the actual pcloads within +;; the branch range and in the pcload range in front of the branch range. + +;; ??? This looks ugly because genattrtab won't allow if_then_else or cond +;; inside an le. +(define_attr "short_cbranch_p" "no,yes" + (cond [(match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD") + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 506)) + (const_string "yes") + (match_test "NEXT_INSN (PREV_INSN (insn)) != insn") + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 508)) + (const_string "yes") + ] (const_string "no"))) + +(define_attr "med_branch_p" "no,yes" + (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 990)) + (const_int 1988)) + (const_string "yes") + (match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD") + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 4092)) + (const_int 8186)) + (const_string "yes") + ] (const_string "no"))) + +(define_attr "med_cbranch_p" "no,yes" + (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 988)) + (const_int 1986)) + (const_string "yes") + (match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD") + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 4090)) + (const_int 8184)) + (const_string "yes") + ] (const_string "no"))) + +(define_attr "braf_branch_p" "no,yes" + (cond [(match_test "! TARGET_SH2") + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 10330)) + (const_int 20660)) + (const_string "yes") + (match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD") + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 32764)) + (const_int 65530)) + (const_string "yes") + ] (const_string "no"))) + +(define_attr "braf_cbranch_p" "no,yes" + (cond [(match_test "! TARGET_SH2") + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 10328)) + (const_int 20658)) + (const_string "yes") + (match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD") + (const_string "no") + (leu (plus (minus (match_dup 0) (pc)) (const_int 32762)) + (const_int 65528)) + (const_string "yes") + ] (const_string "no"))) + +;; An unconditional jump in the range -4092..4098 can be 2 bytes long. +;; For wider ranges, we need a combination of a code and a data part. +;; If we can get a scratch register for a long range jump, the code +;; part can be 4 bytes long; otherwise, it must be 8 bytes long. +;; If the jump is in the range -32764..32770, the data part can be 2 bytes +;; long; otherwise, it must be 6 bytes long. + +;; All other instructions are two bytes long by default. + +;; ??? This should use something like *branch_p (minus (match_dup 0) (pc)), +;; but getattrtab doesn't understand this. +(define_attr "length" "" + (cond [(eq_attr "type" "cbranch") + (cond [(eq_attr "short_cbranch_p" "yes") + (const_int 2) + (eq_attr "med_cbranch_p" "yes") + (const_int 6) + (eq_attr "braf_cbranch_p" "yes") + (const_int 12) +;; ??? using pc is not computed transitively. + (ne (match_dup 0) (match_dup 0)) + (const_int 14) + (match_test "flag_pic") + (const_int 24) + ] (const_int 16)) + (eq_attr "type" "jump") + (cond [(eq_attr "med_branch_p" "yes") + (const_int 2) + (and (match_test "prev_nonnote_insn (insn)") + (and (eq (symbol_ref "GET_CODE (prev_nonnote_insn (insn))") + (symbol_ref "INSN")) + (eq (symbol_ref "INSN_CODE (prev_nonnote_insn (insn))") + (symbol_ref "code_for_indirect_jump_scratch")))) + (cond [(eq_attr "braf_branch_p" "yes") + (const_int 6) + (not (match_test "flag_pic")) + (const_int 10) + (match_test "TARGET_SH2") + (const_int 10)] (const_int 18)) + (eq_attr "braf_branch_p" "yes") + (const_int 10) +;; ??? using pc is not computed transitively. + (ne (match_dup 0) (match_dup 0)) + (const_int 12) + (match_test "flag_pic") + (const_int 22) + ] (const_int 14)) + (eq_attr "type" "pt_media") + (if_then_else (match_test "TARGET_SHMEDIA64") + (const_int 20) (const_int 12)) + (and (eq_attr "type" "jump_media") + (match_test "TARGET_SH5_CUT2_WORKAROUND")) + (const_int 8) + ] (if_then_else (match_test "TARGET_SHMEDIA") + (const_int 4) + (const_int 2)))) + +;; DFA descriptions for the pipelines + +(include "sh1.md") +(include "shmedia.md") +(include "sh4.md") + +(include "iterators.md") +(include "predicates.md") +(include "constraints.md") + +;; Definitions for filling delay slots + +(define_attr "needs_delay_slot" "yes,no" (const_string "no")) + +(define_attr "banked" "yes,no" + (cond [(match_test "sh_loads_bankedreg_p (insn)") + (const_string "yes")] + (const_string "no"))) + +;; ??? This should be (nil) instead of (const_int 0) +(define_attr "hit_stack" "yes,no" + (cond [(not (match_test "find_regno_note (insn, REG_INC, SP_REG)")) + (const_string "no")] + (const_string "yes"))) + +(define_attr "interrupt_function" "no,yes" + (const (symbol_ref "current_function_interrupt"))) + +(define_attr "in_delay_slot" "yes,no" + (cond [(eq_attr "type" "cbranch") (const_string "no") + (eq_attr "type" "pcload,pcload_si") (const_string "no") + (eq_attr "needs_delay_slot" "yes") (const_string "no") + (eq_attr "length" "2") (const_string "yes") + ] (const_string "no"))) + +(define_attr "cond_delay_slot" "yes,no" + (cond [(eq_attr "in_delay_slot" "yes") (const_string "yes") + ] (const_string "no"))) + +(define_attr "is_sfunc" "" + (if_then_else (eq_attr "type" "sfunc") (const_int 1) (const_int 0))) + +(define_attr "is_mac_media" "" + (if_then_else (eq_attr "type" "mac_media") (const_int 1) (const_int 0))) + +(define_attr "branch_zero" "yes,no" + (cond [(eq_attr "type" "!cbranch") (const_string "no") + (ne (symbol_ref "(next_active_insn (insn)\ + == (prev_active_insn\ + (XEXP (SET_SRC (PATTERN (insn)), 1))))\ + && get_attr_length (next_active_insn (insn)) == 2") + (const_int 0)) + (const_string "yes")] + (const_string "no"))) + +;; SH4 Double-precision computation with double-precision result - +;; the two halves are ready at different times. +(define_attr "dfp_comp" "yes,no" + (cond [(eq_attr "type" "dfp_arith,dfp_mul,dfp_conv,dfdiv") (const_string "yes")] + (const_string "no"))) + +;; Insns for which the latency of a preceding fp insn is decreased by one. +(define_attr "late_fp_use" "yes,no" (const_string "no")) +;; And feeding insns for which this relevant. +(define_attr "any_fp_comp" "yes,no" + (cond [(eq_attr "type" "fp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv") + (const_string "yes")] + (const_string "no"))) + +(define_attr "any_int_load" "yes,no" + (cond [(eq_attr "type" "load,load_si,pcload,pcload_si") + (const_string "yes")] + (const_string "no"))) + +(define_attr "highpart" "user, ignore, extend, depend, must_split" + (const_string "user")) + +(define_delay + (eq_attr "needs_delay_slot" "yes") + [(eq_attr "in_delay_slot" "yes") (nil) (nil)]) + +;; Since a normal return (rts) implicitly uses the PR register, +;; we can't allow PR register loads in an rts delay slot. +;; On the SH1* and SH2*, the rte instruction reads the return pc from the +;; stack, and thus we can't put a pop instruction in its delay slot. +;; On the SH3* and SH4*, the rte instruction does not use the stack, so a +;; pop instruction can go in the delay slot, unless it references a banked +;; register (the register bank is switched by rte). +(define_delay + (eq_attr "type" "return") + [(and (eq_attr "in_delay_slot" "yes") + (ior (and (eq_attr "interrupt_function" "no") + (eq_attr "type" "!pload,prset")) + (and (eq_attr "interrupt_function" "yes") + (ior (match_test "TARGET_SH3") (eq_attr "hit_stack" "no")) + (eq_attr "banked" "no")))) + (nil) (nil)]) + +;; Since a call implicitly uses the PR register, we can't allow +;; a PR register store in a jsr delay slot. + +(define_delay + (ior (eq_attr "type" "call") (eq_attr "type" "sfunc")) + [(and (eq_attr "in_delay_slot" "yes") + (eq_attr "type" "!pstore,prget")) (nil) (nil)]) + +;; Say that we have annulled true branches, since this gives smaller and +;; faster code when branches are predicted as not taken. + +;; ??? The non-annulled condition should really be "in_delay_slot", +;; but insns that can be filled in non-annulled get priority over insns +;; that can only be filled in anulled. + +(define_delay + (and (eq_attr "type" "cbranch") + (match_test "TARGET_SH2")) + ;; SH2e has a hardware bug that pretty much prohibits the use of + ;; annulled delay slots. + [(eq_attr "cond_delay_slot" "yes") (and (eq_attr "cond_delay_slot" "yes") + (not (eq_attr "cpu" "sh2e"))) (nil)]) + +;; ------------------------------------------------------------------------- +;; SImode signed integer comparisons +;; ------------------------------------------------------------------------- + +;; Various patterns to generate the TST #imm, R0 instruction. +;; Although this adds some pressure on the R0 register, it can potentially +;; result in faster code, even if the operand has to be moved to R0 first. +;; This is because on SH4 TST #imm, R0 and MOV Rm, Rn are both MT group +;; instructions and thus will be executed in parallel. On SH4A TST #imm, R0 +;; is an EX group instruction but still can be executed in parallel with the +;; MT group MOV Rm, Rn instruction. + +;; Usual TST #imm, R0 patterns for SI, HI and QI +;; This is usually used for bit patterns other than contiguous bits +;; and single bits. +(define_insn "tstsi_t" + [(set (reg:SI T_REG) + (eq:SI (and:SI (match_operand:SI 0 "logical_operand" "%z,r") + (match_operand:SI 1 "logical_operand" "K08,r")) + (const_int 0)))] + "TARGET_SH1" + "tst %1,%0" + [(set_attr "type" "mt_group")]) + +(define_insn "tsthi_t" + [(set (reg:SI T_REG) + (eq:SI (subreg:SI (and:HI (match_operand:HI 0 "logical_operand" "%z") + (match_operand 1 "const_int_operand")) 0) + (const_int 0)))] + "TARGET_SH1 + && CONST_OK_FOR_K08 (INTVAL (operands[1]))" + "tst %1,%0" + [(set_attr "type" "mt_group")]) + +(define_insn "tstqi_t" + [(set (reg:SI T_REG) + (eq:SI (subreg:SI (and:QI (match_operand:QI 0 "logical_operand" "%z") + (match_operand 1 "const_int_operand")) 0) + (const_int 0)))] + "TARGET_SH1 + && (CONST_OK_FOR_K08 (INTVAL (operands[1])) + || CONST_OK_FOR_I08 (INTVAL (operands[1])))" +{ + operands[1] = GEN_INT (INTVAL (operands[1]) & 255); + return "tst %1,%0"; +} + [(set_attr "type" "mt_group")]) + +;; Test low QI subreg against zero. +;; This avoids unnecessary zero extension before the test. +(define_insn "*tstqi_t_zero" + [(set (reg:SI T_REG) + (eq:SI (match_operand:QI 0 "logical_operand" "z") (const_int 0)))] + "TARGET_SH1" + "tst #255,%0" + [(set_attr "type" "mt_group")]) + +;; This pattern might be risky because it also tests the upper bits and not +;; only the subreg. However, it seems that combine will get to this only +;; when testing sign/zero extended values. In this case the extended upper +;; bits do not matter. +(define_insn "*tst<mode>_t_zero" + [(set (reg:SI T_REG) + (eq:SI + (subreg:QIHI + (and:SI (match_operand:SI 0 "arith_reg_operand" "%r") + (match_operand:SI 1 "arith_reg_operand" "r")) <lowpart_le>) + (const_int 0)))] + "TARGET_SH1 && TARGET_LITTLE_ENDIAN" + "tst %0,%1" + [(set_attr "type" "mt_group")]) + +(define_insn "*tst<mode>_t_zero" + [(set (reg:SI T_REG) + (eq:SI + (subreg:QIHI + (and:SI (match_operand:SI 0 "arith_reg_operand" "%r") + (match_operand:SI 1 "arith_reg_operand" "r")) <lowpart_be>) + (const_int 0)))] + "TARGET_SH1 && TARGET_BIG_ENDIAN" + "tst %0,%1" + [(set_attr "type" "mt_group")]) + +;; Extract LSB, negate and store in T bit. +(define_insn "tstsi_t_and_not" + [(set (reg:SI T_REG) + (and:SI (not:SI (match_operand:SI 0 "logical_operand" "z")) + (const_int 1)))] + "TARGET_SH1" + "tst #1,%0" + [(set_attr "type" "mt_group")]) + +;; Extract contiguous bits and compare them against zero. +(define_insn "tst<mode>_t_zero_extract_eq" + [(set (reg:SI T_REG) + (eq:SI (zero_extract:SI (match_operand:QIHISIDI 0 "logical_operand" "z") + (match_operand:SI 1 "const_int_operand") + (match_operand:SI 2 "const_int_operand")) + (const_int 0)))] + "TARGET_SH1 + && CONST_OK_FOR_K08 (ZERO_EXTRACT_ANDMASK (operands[1], operands[2]))" +{ + operands[1] = GEN_INT (ZERO_EXTRACT_ANDMASK (operands[1], operands[2])); + return "tst %1,%0"; +} + [(set_attr "type" "mt_group")]) + +;; This split is required when testing bits in a QI subreg. +(define_split + [(set (reg:SI T_REG) + (eq:SI + (if_then_else:SI + (zero_extract:SI (match_operand 0 "logical_operand") + (match_operand 1 "const_int_operand") + (match_operand 2 "const_int_operand")) + (match_operand 3 "const_int_operand") + (const_int 0)) + (const_int 0)))] + "TARGET_SH1 + && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) == INTVAL (operands[3]) + && CONST_OK_FOR_K08 (INTVAL (operands[3]))" + [(set (reg:SI T_REG) (eq:SI (and:SI (match_dup 0) (match_dup 3)) + (const_int 0)))] +{ + if (GET_MODE (operands[0]) == QImode) + operands[0] = simplify_gen_subreg (SImode, operands[0], QImode, 0); +}) + +;; Extract single bit, negate and store it in the T bit. +;; Not used for SH4A. +(define_insn "tstsi_t_zero_extract_xor" + [(set (reg:SI T_REG) + (zero_extract:SI (xor:SI (match_operand:SI 0 "logical_operand" "z") + (match_operand:SI 3 "const_int_operand")) + (match_operand:SI 1 "const_int_operand") + (match_operand:SI 2 "const_int_operand")))] + "TARGET_SH1 + && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) == INTVAL (operands[3]) + && CONST_OK_FOR_K08 (INTVAL (operands[3]))" + "tst %3,%0" + [(set_attr "type" "mt_group")]) + +;; Extract single bit, negate and store it in the T bit. +;; Used for SH4A little endian. +(define_insn "tstsi_t_zero_extract_subreg_xor_little" + [(set (reg:SI T_REG) + (zero_extract:SI + (subreg:QI (xor:SI (match_operand:SI 0 "logical_operand" "z") + (match_operand:SI 3 "const_int_operand")) 0) + (match_operand:SI 1 "const_int_operand") + (match_operand:SI 2 "const_int_operand")))] + "TARGET_SH1 && TARGET_LITTLE_ENDIAN + && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) + == (INTVAL (operands[3]) & 255) + && CONST_OK_FOR_K08 (INTVAL (operands[3]) & 255)" +{ + operands[3] = GEN_INT (INTVAL (operands[3]) & 255); + return "tst %3,%0"; +} + [(set_attr "type" "mt_group")]) + +;; Extract single bit, negate and store it in the T bit. +;; Used for SH4A big endian. +(define_insn "tstsi_t_zero_extract_subreg_xor_big" + [(set (reg:SI T_REG) + (zero_extract:SI + (subreg:QI (xor:SI (match_operand:SI 0 "logical_operand" "z") + (match_operand:SI 3 "const_int_operand")) 3) + (match_operand:SI 1 "const_int_operand") + (match_operand:SI 2 "const_int_operand")))] + "TARGET_SH1 && TARGET_BIG_ENDIAN + && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) + == (INTVAL (operands[3]) & 255) + && CONST_OK_FOR_K08 (INTVAL (operands[3]) & 255)" +{ + operands[3] = GEN_INT (INTVAL (operands[3]) & 255); + return "tst %3,%0"; +} + [(set_attr "type" "mt_group")]) + +(define_insn "cmpeqsi_t" + [(set (reg:SI T_REG) + (eq:SI (match_operand:SI 0 "arith_reg_operand" "r,z,r") + (match_operand:SI 1 "arith_operand" "N,rI08,r")))] + "TARGET_SH1" + "@ + tst %0,%0 + cmp/eq %1,%0 + cmp/eq %1,%0" + [(set_attr "type" "mt_group")]) + +;; FIXME: For some reason, on SH4A and SH2A combine fails to simplify this +;; pattern by itself. What this actually does is: +;; x == 0: (1 >> 0-0) & 1 = 1 +;; x != 0: (1 >> 0-x) & 1 = 0 +;; Without this the test pr51244-8.c fails on SH2A and SH4A. +(define_insn_and_split "*cmpeqsi_t" + [(set (reg:SI T_REG) + (and:SI (lshiftrt:SI + (const_int 1) + (neg:SI (match_operand:SI 0 "arith_reg_operand" "r"))) + (const_int 1)))] + "TARGET_SH1" + "#" + "&& 1" + [(set (reg:SI T_REG) (eq:SI (match_dup 0) (const_int 0)))]) + +(define_insn "cmpgtsi_t" + [(set (reg:SI T_REG) + (gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r") + (match_operand:SI 1 "arith_reg_or_0_operand" "N,r")))] + "TARGET_SH1" + "@ + cmp/pl %0 + cmp/gt %1,%0" + [(set_attr "type" "mt_group")]) + +(define_insn "cmpgesi_t" + [(set (reg:SI T_REG) + (ge:SI (match_operand:SI 0 "arith_reg_operand" "r,r") + (match_operand:SI 1 "arith_reg_or_0_operand" "N,r")))] + "TARGET_SH1" + "@ + cmp/pz %0 + cmp/ge %1,%0" + [(set_attr "type" "mt_group")]) + +;; FIXME: This is actually wrong. There is no way to literally move a +;; general reg to t reg. Luckily, it seems that this pattern will be only +;; used when the general reg is known be either '0' or '1' during combine. +;; What we actually need is reg != 0 -> T, but we have only reg == 0 -> T. +;; Due to interactions with other patterns, combine fails to pick the latter +;; and invert the dependent logic. +(define_insn "*negtstsi" + [(set (reg:SI T_REG) (match_operand:SI 0 "arith_reg_operand" "r"))] + "TARGET_SH1" + "cmp/pl %0" + [(set_attr "type" "mt_group")]) + +;; Some integer sign comparison patterns can be realized with the div0s insn. +;; div0s Rm,Rn T = (Rm >> 31) ^ (Rn >> 31) +(define_insn "cmp_div0s_0" + [(set (reg:SI T_REG) + (lshiftrt:SI (xor:SI (match_operand:SI 0 "arith_reg_operand" "%r") + (match_operand:SI 1 "arith_reg_operand" "r")) + (const_int 31)))] + "TARGET_SH1" + "div0s %0,%1" + [(set_attr "type" "arith")]) + +(define_insn "cmp_div0s_1" + [(set (reg:SI T_REG) + (lt:SI (xor:SI (match_operand:SI 0 "arith_reg_operand" "%r") + (match_operand:SI 1 "arith_reg_operand" "r")) + (const_int 0)))] + "TARGET_SH1" + "div0s %0,%1" + [(set_attr "type" "arith")]) + +(define_insn_and_split "*cmp_div0s_0" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (lshiftrt:SI (xor:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" "")) + (const_int 31))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(set (reg:SI T_REG) + (lshiftrt:SI (xor:SI (match_dup 1) (match_dup 2)) (const_int 31))) + (set (match_dup 0) (reg:SI T_REG))]) + +(define_insn "*cmp_div0s_0" + [(set (reg:SI T_REG) + (eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand") + (const_int 31)) + (ge:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 0))))] + "TARGET_SH1" + "div0s %0,%1" + [(set_attr "type" "arith")]) + +(define_insn_and_split "*cmp_div0s_1" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (ge:SI (xor:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" "")) + (const_int 0))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +;; We have to go through the movnegt expander here which will handle the +;; SH2A vs non-SH2A cases. +{ + emit_insn (gen_cmp_div0s_1 (operands[1], operands[2])); + emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ())); + DONE; +}) + +(define_insn_and_split "*cmp_div0s_1" + [(set (reg:SI T_REG) + (ge:SI (xor:SI (match_operand:SI 0 "arith_reg_operand" "") + (match_operand:SI 1 "arith_reg_operand" "")) + (const_int 0)))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 0) (match_dup 1)) + (const_int 0))) + (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))]) + +(define_insn_and_split "*cmp_div0s_1" + [(set (reg:SI T_REG) + (eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand") + (const_int 31)) + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 31))))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 0) (match_dup 1)) + (const_int 0))) + (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))]) + +;; ------------------------------------------------------------------------- +;; SImode compare and branch +;; ------------------------------------------------------------------------- + +(define_expand "cbranchsi4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:SI 1 "arith_operand" "") + (match_operand:SI 2 "arith_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (reg:SI T_REG))] + "" +{ + if (TARGET_SHMEDIA) + emit_jump_insn (gen_cbranchint4_media (operands[0], operands[1], + operands[2], operands[3])); + else + expand_cbranchsi4 (operands, LAST_AND_UNUSED_RTX_CODE, -1); + + DONE; +}) + +;; Combine patterns to invert compare and branch operations for which we +;; don't have actual comparison insns. These patterns are used in cases +;; which appear after the initial cbranchsi expansion, which also does +;; some condition inversion. +(define_split + [(set (pc) + (if_then_else (ne (match_operand:SI 0 "arith_reg_operand" "") + (match_operand:SI 1 "arith_reg_or_0_operand" "")) + (label_ref (match_operand 2)) + (pc))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + [(set (reg:SI T_REG) (eq:SI (match_dup 0) (match_dup 1))) + (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (label_ref (match_dup 2)) + (pc)))]) + +;; FIXME: Similar to the *cmpeqsi_t pattern above, for some reason, on SH4A +;; and SH2A combine fails to simplify this pattern by itself. +;; What this actually does is: +;; x == 0: (1 >> 0-0) & 1 = 1 +;; x != 0: (1 >> 0-x) & 1 = 0 +;; Without this the test pr51244-8.c fails on SH2A and SH4A. +(define_split + [(set (pc) + (if_then_else + (eq (and:SI (lshiftrt:SI + (const_int 1) + (neg:SI (match_operand:SI 0 "arith_reg_operand" ""))) + (const_int 1)) + (const_int 0)) + (label_ref (match_operand 2)) + (pc))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + [(set (reg:SI T_REG) (eq:SI (match_dup 0) (const_int 0))) + (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (label_ref (match_dup 2)) + (pc)))]) + +;; FIXME: These could probably use code iterators for the compare op. +(define_split + [(set (pc) + (if_then_else (le (match_operand:SI 0 "arith_reg_operand" "") + (match_operand:SI 1 "arith_reg_or_0_operand" "")) + (label_ref (match_operand 2)) + (pc))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + [(set (reg:SI T_REG) (gt:SI (match_dup 0) (match_dup 1))) + (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (label_ref (match_dup 2)) + (pc)))]) + +(define_split + [(set (pc) + (if_then_else (lt (match_operand:SI 0 "arith_reg_operand" "") + (match_operand:SI 1 "arith_reg_or_0_operand" "")) + (label_ref (match_operand 2)) + (pc))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + [(set (reg:SI T_REG) (ge:SI (match_dup 0) (match_dup 1))) + (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (label_ref (match_dup 2)) + (pc)))]) + +(define_split + [(set (pc) + (if_then_else (leu (match_operand:SI 0 "arith_reg_operand" "") + (match_operand:SI 1 "arith_reg_operand" "")) + (label_ref (match_operand 2)) + (pc))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + [(set (reg:SI T_REG) (gtu:SI (match_dup 0) (match_dup 1))) + (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (label_ref (match_dup 2)) + (pc)))]) + +(define_split + [(set (pc) + (if_then_else (ltu (match_operand:SI 0 "arith_reg_operand" "") + (match_operand:SI 1 "arith_reg_operand" "")) + (label_ref (match_operand 2)) + (pc))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + [(set (reg:SI T_REG) (geu:SI (match_dup 0) (match_dup 1))) + (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (label_ref (match_dup 2)) + (pc)))]) + +;; Compare and branch combine patterns for div0s comparisons. +(define_insn_and_split "*cbranch_div0s" + [(set (pc) + (if_then_else (lt (xor:SI (match_operand:SI 0 "arith_reg_operand" "") + (match_operand:SI 1 "arith_reg_operand" "")) + (const_int 0)) + (label_ref (match_operand 2)) + (pc))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(set (reg:SI T_REG) + (lt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 0))) + (set (pc) + (if_then_else (ne (reg:SI T_REG) (const_int 0)) + (label_ref (match_dup 2)) + (pc)))]) + +(define_insn_and_split "*cbranch_div0s" + [(set (pc) + (if_then_else (ge (xor:SI (match_operand:SI 0 "arith_reg_operand" "") + (match_operand:SI 1 "arith_reg_operand" "")) + (const_int 0)) + (label_ref (match_operand 2)) + (pc))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(set (reg:SI T_REG) + (lt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 0))) + (set (pc) + (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (label_ref (match_dup 2)) + (pc)))]) + +;; Conditional move combine pattern for div0s comparisons. +;; This is used when TARGET_PRETEND_CMOVE is in effect. +(define_insn_and_split "*movsicc_div0s" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (if_then_else:SI (ge (xor:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" "")) + (const_int 0)) + (match_operand:SI 3 "arith_reg_operand" "") + (match_operand:SI 4 "general_movsrc_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_PRETEND_CMOVE" + "#" + "&& 1" + [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) + (if_then_else (ne (reg:SI T_REG) (const_int 0)) + (match_dup 4) + (match_dup 3)))]) + +(define_insn_and_split "*movsicc_div0s" + [(set (match_operand:SI 0 "arith_reg_dest") + (if_then_else:SI (eq (lshiftrt:SI + (match_operand:SI 1 "arith_reg_operand") + (const_int 31)) + (lshiftrt:SI + (match_operand:SI 2 "arith_reg_operand") + (const_int 31))) + (match_operand:SI 3 "arith_reg_operand") + (match_operand:SI 4 "general_movsrc_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_PRETEND_CMOVE" + "#" + "&& 1" + [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 1) (match_dup 2)) + (const_int 0))) + (set (match_dup 0) + (if_then_else (ne (reg:SI T_REG) (const_int 0)) + (match_dup 4) + (match_dup 3)))]) + +;; ------------------------------------------------------------------------- +;; SImode unsigned integer comparisons +;; ------------------------------------------------------------------------- + +;; Usually comparisons of 'unsigned int >= 0' are optimized away completely. +;; However, especially when optimizations are off (e.g. -O0) such comparisons +;; might remain and we have to handle them. If the '>= 0' case wasn't +;; handled here, something else would just load a '0' into the second operand +;; and do the comparison. We can do slightly better by just setting the +;; T bit to '1'. +(define_insn_and_split "cmpgeusi_t" + [(set (reg:SI T_REG) + (geu:SI (match_operand:SI 0 "arith_reg_operand" "r") + (match_operand:SI 1 "arith_reg_or_0_operand" "r")))] + "TARGET_SH1" + "cmp/hs %1,%0" + "&& satisfies_constraint_Z (operands[1])" + [(set (reg:SI T_REG) (const_int 1))] + "" + [(set_attr "type" "mt_group")]) + +(define_insn "cmpgtusi_t" + [(set (reg:SI T_REG) + (gtu:SI (match_operand:SI 0 "arith_reg_operand" "r") + (match_operand:SI 1 "arith_reg_operand" "r")))] + "TARGET_SH1" + "cmp/hi %1,%0" + [(set_attr "type" "mt_group")]) + +;; ------------------------------------------------------------------------- +;; DImode compare and branch +;; ------------------------------------------------------------------------- + +;; arith3 patterns don't work well with the sh4-300 branch prediction mechanism. +;; Therefore, we aim to have a set of three branches that go straight to the +;; destination, i.e. only one of them is taken at any one time. +;; This mechanism should also be slightly better for the sh4-200. + +(define_expand "cbranchdi4" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:DI 1 "arith_operand" "") + (match_operand:DI 2 "arith_operand" "")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_dup 4)) + (clobber (reg:SI T_REG))] + "TARGET_CBRANCHDI4 || TARGET_SH2 || TARGET_SHMEDIA" +{ + enum rtx_code comparison; + + if (TARGET_SHMEDIA) + { + emit_jump_insn (gen_cbranchint4_media (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + else if (!TARGET_CBRANCHDI4) + { + sh_emit_compare_and_branch (operands, DImode); + DONE; + } + else + { + if (expand_cbranchdi4 (operands, LAST_AND_UNUSED_RTX_CODE)) + DONE; + + comparison = prepare_cbranch_operands (operands, DImode, + LAST_AND_UNUSED_RTX_CODE); + if (comparison != GET_CODE (operands[0])) + operands[0] + = gen_rtx_fmt_ee (comparison, VOIDmode, operands[1], operands[2]); + operands[4] = gen_rtx_SCRATCH (SImode); + } +}) + +(define_insn_and_split "cbranchdi4_i" + [(set (pc) + (if_then_else (match_operator 0 "comparison_operator" + [(match_operand:DI 1 "arith_operand" "r,r") + (match_operand:DI 2 "arith_operand" "rN,I08")]) + (label_ref (match_operand 3 "" "")) + (pc))) + (clobber (match_scratch:SI 4 "=X,&r")) + (clobber (reg:SI T_REG))] + "TARGET_CBRANCHDI4" + "#" + "&& reload_completed" + [(pc)] +{ + if (!expand_cbranchdi4 (operands, GET_CODE (operands[0]))) + FAIL; + DONE; +}) + +;; ------------------------------------------------------------------------- +;; DImode signed integer comparisons +;; ------------------------------------------------------------------------- + +(define_insn "" + [(set (reg:SI T_REG) + (eq:SI (and:DI (match_operand:DI 0 "arith_reg_operand" "r") + (match_operand:DI 1 "arith_operand" "r")) + (const_int 0)))] + "TARGET_SH1" +{ + return output_branchy_insn (EQ, "tst\t%S1,%S0;bf\t%l9;tst\t%R1,%R0", + insn, operands); +} + [(set_attr "length" "6") + (set_attr "type" "arith3b")]) + +(define_insn "cmpeqdi_t" + [(set (reg:SI T_REG) + (eq:SI (match_operand:DI 0 "arith_reg_operand" "r,r") + (match_operand:DI 1 "arith_reg_or_0_operand" "N,r")))] + "TARGET_SH1" +{ + static const char* alt[] = + { + "tst %S0,%S0" "\n" + " bf 0f" "\n" + " tst %R0,%R0" "\n" + "0:", + + "cmp/eq %S1,%S0" "\n" + " bf 0f" "\n" + " cmp/eq %R1,%R0" "\n" + "0:" + }; + return alt[which_alternative]; +} + [(set_attr "length" "6") + (set_attr "type" "arith3b")]) + +(define_split + [(set (reg:SI T_REG) + (eq:SI (match_operand:DI 0 "arith_reg_operand" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "")))] +;; If we applied this split when not optimizing, it would only be +;; applied during the machine-dependent reorg, when no new basic blocks +;; may be created. + "TARGET_SH1 && reload_completed && optimize" + [(set (reg:SI T_REG) (eq:SI (match_dup 2) (match_dup 3))) + (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (label_ref (match_dup 6)) + (pc))) + (set (reg:SI T_REG) (eq:SI (match_dup 4) (match_dup 5))) + (match_dup 6)] +{ + operands[2] = gen_highpart (SImode, operands[0]); + operands[3] = operands[1] == const0_rtx + ? const0_rtx + : gen_highpart (SImode, operands[1]); + operands[4] = gen_lowpart (SImode, operands[0]); + operands[5] = gen_lowpart (SImode, operands[1]); + operands[6] = gen_label_rtx (); +}) + +(define_insn "cmpgtdi_t" + [(set (reg:SI T_REG) + (gt:SI (match_operand:DI 0 "arith_reg_operand" "r,r") + (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))] + "TARGET_SH2" +{ + static const char* alt[] = + { + "cmp/eq %S1,%S0" "\n" + " bf{.|/}s 0f" "\n" + " cmp/gt %S1,%S0" "\n" + " cmp/hi %R1,%R0" "\n" + "0:", + + "tst %S0,%S0" "\n" + " bf{.|/}s 0f" "\n" + " cmp/pl %S0" "\n" + " cmp/hi %S0,%R0" "\n" + "0:" + }; + return alt[which_alternative]; +} + [(set_attr "length" "8") + (set_attr "type" "arith3")]) + +(define_insn "cmpgedi_t" + [(set (reg:SI T_REG) + (ge:SI (match_operand:DI 0 "arith_reg_operand" "r,r") + (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))] + "TARGET_SH2" +{ + static const char* alt[] = + { + "cmp/eq %S1,%S0" "\n" + " bf{.|/}s 0f" "\n" + " cmp/ge %S1,%S0" "\n" + " cmp/hs %R1,%R0" "\n" + "0:", + + "cmp/pz %S0" + }; + return alt[which_alternative]; +} + [(set_attr "length" "8,2") + (set_attr "type" "arith3,mt_group")]) + +;; ------------------------------------------------------------------------- +;; DImode unsigned integer comparisons +;; ------------------------------------------------------------------------- + +(define_insn "cmpgeudi_t" + [(set (reg:SI T_REG) + (geu:SI (match_operand:DI 0 "arith_reg_operand" "r") + (match_operand:DI 1 "arith_reg_operand" "r")))] + "TARGET_SH2" +{ + return "cmp/eq %S1,%S0" "\n" + " bf{.|/}s 0f" "\n" + " cmp/hs %S1,%S0" "\n" + " cmp/hs %R1,%R0" "\n" + "0:"; +} + [(set_attr "length" "8") + (set_attr "type" "arith3")]) + +(define_insn "cmpgtudi_t" + [(set (reg:SI T_REG) + (gtu:SI (match_operand:DI 0 "arith_reg_operand" "r") + (match_operand:DI 1 "arith_reg_operand" "r")))] + "TARGET_SH2" +{ + return "cmp/eq %S1,%S0" "\n" + " bf{.|/}s 0f" "\n" + " cmp/hi %S1,%S0" "\n" + " cmp/hi %R1,%R0" "\n" + "0:"; +} + [(set_attr "length" "8") + (set_attr "type" "arith3")]) + +(define_insn "cmpeqsi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (match_operand:SI 1 "logical_operand" "%r") + (match_operand:SI 2 "cmp_operand" "Nr")))] + "TARGET_SHMEDIA" + "cmpeq %1, %N2, %0" + [(set_attr "type" "cmp_media")]) + +(define_insn "cmpeqdi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (match_operand:DI 1 "register_operand" "%r") + (match_operand:DI 2 "cmp_operand" "Nr")))] + "TARGET_SHMEDIA" + "cmpeq %1, %N2, %0" + [(set_attr "type" "cmp_media")]) + +(define_insn "cmpgtsi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (gt:SI (match_operand:SI 1 "cmp_operand" "Nr") + (match_operand:SI 2 "cmp_operand" "rN")))] + "TARGET_SHMEDIA" + "cmpgt %N1, %N2, %0" + [(set_attr "type" "cmp_media")]) + +(define_insn "cmpgtdi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (gt:SI (match_operand:DI 1 "arith_reg_or_0_operand" "Nr") + (match_operand:DI 2 "arith_reg_or_0_operand" "rN")))] + "TARGET_SHMEDIA" + "cmpgt %N1, %N2, %0" + [(set_attr "type" "cmp_media")]) + +(define_insn "cmpgtusi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (gtu:SI (match_operand:SI 1 "cmp_operand" "Nr") + (match_operand:SI 2 "cmp_operand" "rN")))] + "TARGET_SHMEDIA" + "cmpgtu %N1, %N2, %0" + [(set_attr "type" "cmp_media")]) + +(define_insn "cmpgtudi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (gtu:SI (match_operand:DI 1 "arith_reg_or_0_operand" "Nr") + (match_operand:DI 2 "arith_reg_or_0_operand" "rN")))] + "TARGET_SHMEDIA" + "cmpgtu %N1, %N2, %0" + [(set_attr "type" "cmp_media")]) + +; This pattern is for combine. +(define_insn "*cmpne0sisi_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (ne:SI (match_operand:SI 1 "arith_reg_operand" "r") (const_int 0)))] + "TARGET_SHMEDIA" + "cmpgtu %1,r63,%0" + [(set_attr "type" "cmp_media")]) + +;; ------------------------------------------------------------------------- +;; Conditional move instructions +;; ------------------------------------------------------------------------- + +;; The insn names may seem reversed, but note that cmveq performs the move +;; if op1 == 0, and cmvne does it if op1 != 0. + +(define_insn "movdicc_false" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (if_then_else:DI (eq (match_operand:DI 1 "arith_reg_operand" "r") + (const_int 0)) + (match_operand:DI 2 "arith_reg_or_0_operand" "rN") + (match_operand:DI 3 "arith_reg_operand" "0")))] + "TARGET_SHMEDIA" + "cmveq %1, %N2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "movdicc_true" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (if_then_else:DI (ne (match_operand:DI 1 "arith_reg_operand" "r") + (const_int 0)) + (match_operand:DI 2 "arith_reg_or_0_operand" "rN") + (match_operand:DI 3 "arith_reg_operand" "0")))] + "TARGET_SHMEDIA" + "cmvne %1, %N2, %0" + [(set_attr "type" "arith_media")]) + +(define_peephole2 + [(set (match_operand:DI 0 "arith_reg_dest" "") + (if_then_else:DI (match_operator 3 "equality_comparison_operator" + [(match_operand:DI 1 "arith_reg_operand" "") + (const_int 0)]) + (match_operand:DI 2 "arith_reg_dest" "") + (match_dup 0))) + (set (match_dup 2) (match_dup 0))] + "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (if_then_else:DI (match_dup 3) (match_dup 0) (match_dup 2)))] +{ + operands[3] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[3])), + VOIDmode, operands[1], CONST0_RTX (DImode)); +}) + +(define_peephole2 + [(set (match_operand:DI 0 "general_movdst_operand" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "")) + (set (match_operand:DI 2 "arith_reg_dest" "") + (if_then_else:DI (match_operator 4 "equality_comparison_operator" + [(match_operand:DI 3 "arith_reg_operand" "") + (const_int 0)]) + (match_dup 0) + (match_dup 2)))] + "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (if_then_else:DI (match_dup 4) (match_dup 1) (match_dup 2)))] + "") + +(define_expand "movdicc" + [(set (match_operand:DI 0 "register_operand" "") + (if_then_else:DI (match_operand 1 "comparison_operator" "") + (match_operand:DI 2 "register_operand" "") + (match_operand:DI 3 "register_operand" "")))] + "TARGET_SHMEDIA" +{ + if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) + && GET_MODE (XEXP (operands[1], 0)) == DImode + && XEXP (operands[1], 1) == const0_rtx) + ; + else + { + if (!can_create_pseudo_p ()) + FAIL; + + operands[1] = sh_emit_cheap_store_flag (GET_MODE (operands[0]), + GET_CODE (operands[1]), + XEXP (operands[1], 0), + XEXP (operands[1], 1)); + if (!operands[1]) + FAIL; + } +}) + +;; Add SImode variants for cmveq / cmvne to compensate for not promoting +;; SImode to DImode. +(define_insn "movsicc_false" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (if_then_else:SI (eq (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 0)) + (match_operand:SI 2 "arith_reg_or_0_operand" "rN") + (match_operand:SI 3 "arith_reg_operand" "0")))] + "TARGET_SHMEDIA" + "cmveq %1, %N2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "movsicc_true" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (if_then_else:SI (ne (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 0)) + (match_operand:SI 2 "arith_reg_or_0_operand" "rN") + (match_operand:SI 3 "arith_reg_operand" "0")))] + "TARGET_SHMEDIA" + "cmvne %1, %N2, %0" + [(set_attr "type" "arith_media")]) + +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_dest" "") + (if_then_else:SI (match_operator 3 "equality_comparison_operator" + [(match_operand:SI 1 "arith_reg_operand" "") + (const_int 0)]) + (match_operand:SI 2 "arith_reg_dest" "") + (match_dup 0))) + (set (match_dup 2) (match_dup 0))] + "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (if_then_else:SI (match_dup 3) (match_dup 0) (match_dup 2)))] +{ + operands[3] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[3])), + VOIDmode, operands[1], CONST0_RTX (SImode)); +}) + +(define_peephole2 + [(set (match_operand:SI 0 "general_movdst_operand" "") + (match_operand:SI 1 "arith_reg_or_0_operand" "")) + (set (match_operand:SI 2 "arith_reg_dest" "") + (if_then_else:SI (match_operator 4 "equality_comparison_operator" + [(match_operand:SI 3 "arith_reg_operand" "") + (const_int 0)]) + (match_dup 0) + (match_dup 2)))] + "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0]) + && (!REG_P (operands[1]) || GENERAL_REGISTER_P (REGNO (operands[1])))" + [(set (match_dup 2) + (if_then_else:SI (match_dup 4) (match_dup 1) (match_dup 2)))] +{ + replace_rtx (operands[4], operands[0], operands[1]); +}) + +(define_peephole2 + [(set (match_operand 0 "any_register_operand" "") + (match_operand 1 "any_register_operand" "")) + (set (match_operand 2 "any_register_operand" "") (match_operand 3 "" "")) + (set (match_operand 4 "" "") (match_operand 5 "" ""))] + "(HARD_REGNO_NREGS (REGNO (operands[0]), GET_MODE (operands[2])) + <= HARD_REGNO_NREGS (REGNO (operands[0]), GET_MODE (operands[0]))) + && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[2]) + && ! FIND_REG_INC_NOTE (peep2_next_insn (2), operands[0]) + && ! FIND_REG_INC_NOTE (peep2_next_insn (2), operands[2]) + && ! reg_overlap_mentioned_p (operands[0], operands[3]) + && ! reg_overlap_mentioned_p (operands[2], operands[0]) + && ! reg_overlap_mentioned_p (operands[0], operands[1]) + && (REGNO_REG_CLASS (REGNO (operands[0])) + == REGNO_REG_CLASS (REGNO (operands[2]))) + && (REGNO_REG_CLASS (REGNO (operands[1])) + == REGNO_REG_CLASS (REGNO (operands[0])))" + [(set (match_dup 0) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + rtx set1, set2, insn2; + rtx replacements[4]; + + /* We want to replace occurrences of operands[0] with operands[1] and + operands[2] with operands[0] in operands[4]/operands[5]. + Doing just two replace_rtx calls naively would result in the second + replacement undoing all that the first did if operands[1] and operands[2] + are identical, so we must do this simultaneously. */ + replacements[0] = operands[0]; + replacements[1] = operands[1]; + replacements[2] = operands[2]; + replacements[3] = operands[0]; + if (!replace_n_hard_rtx (operands[5], replacements, 2, 0) + || !replace_n_hard_rtx (operands[4], replacements, 2, 0) + || !replace_n_hard_rtx (operands[2], replacements, 2, 0)) + FAIL; + + operands[5] = replace_n_hard_rtx (operands[5], replacements, 2, 1); + replace_n_hard_rtx (operands[4], replacements, 2, 1); + operands[2] = replace_n_hard_rtx (operands[2], replacements, 2, 1); + /* The operands array is aliased to recog_data.operand, which gets + clobbered by extract_insn, so finish with it now. */ + set1 = gen_rtx_SET (VOIDmode, operands[2], operands[3]); + set2 = gen_rtx_SET (VOIDmode, operands[4], operands[5]); + /* ??? The last insn might be a jump insn, but the generic peephole2 code + always uses emit_insn. */ + /* Check that we don't violate matching constraints or earlyclobbers. */ + extract_insn (emit_insn (set1)); + if (! constrain_operands (1)) + goto failure; + insn2 = emit (set2); + if (GET_CODE (insn2) == BARRIER) + goto failure; + extract_insn (insn2); + if (! constrain_operands (1)) + { + rtx tmp; + failure: + tmp = replacements[0]; + replacements[0] = replacements[1]; + replacements[1] = tmp; + tmp = replacements[2]; + replacements[2] = replacements[3]; + replacements[3] = tmp; + replace_n_hard_rtx (SET_DEST (set1), replacements, 2, 1); + replace_n_hard_rtx (SET_DEST (set2), replacements, 2, 1); + replace_n_hard_rtx (SET_SRC (set2), replacements, 2, 1); + FAIL; + } + DONE; +}) + +;; The register allocator is rather clumsy in handling multi-way conditional +;; moves, so allow the combiner to make them, and we split them up after +;; reload. */ +(define_insn_and_split "*movsicc_umin" + [(set (match_operand:SI 0 "arith_reg_dest" "=&r") + (umin:SI (if_then_else:SI + (eq (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 0)) + (match_operand:SI 2 "arith_reg_or_0_operand" "rN") + (match_operand:SI 3 "register_operand" "0")) + (match_operand:SI 4 "arith_reg_or_0_operand" "r"))) + (clobber (match_scratch:SI 5 "=&r"))] + "TARGET_SHMEDIA && !can_create_pseudo_p ()" + "#" + "TARGET_SHMEDIA && reload_completed" + [(pc)] +{ + emit_insn (gen_movsicc_false (operands[0], operands[1], operands[2], + operands[3])); + emit_insn (gen_cmpgtusi_media (operands[5], operands[4], operands[0])); + emit_insn (gen_movsicc_false (operands[0], operands[5], operands[4], + operands[0])); + DONE; +}) + +(define_insn "*movsicc_t_false" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (match_operand:SI 1 "general_movsrc_operand" "r,I08") + (match_operand:SI 2 "arith_reg_operand" "0,0")))] + "TARGET_PRETEND_CMOVE + && (arith_reg_operand (operands[1], SImode) + || (immediate_operand (operands[1], SImode) + && satisfies_constraint_I08 (operands[1])))" +{ + return "bt 0f" "\n" + " mov %1,%0" "\n" + "0:"; +} + [(set_attr "type" "mt_group,arith") ;; poor approximation + (set_attr "length" "4")]) + +(define_insn "*movsicc_t_true" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (if_then_else (ne (reg:SI T_REG) (const_int 0)) + (match_operand:SI 1 "general_movsrc_operand" "r,I08") + (match_operand:SI 2 "arith_reg_operand" "0,0")))] + "TARGET_PRETEND_CMOVE + && (arith_reg_operand (operands[1], SImode) + || (immediate_operand (operands[1], SImode) + && satisfies_constraint_I08 (operands[1])))" +{ + return "bf 0f" "\n" + " mov %1,%0" "\n" + "0:"; +} + [(set_attr "type" "mt_group,arith") ;; poor approximation + (set_attr "length" "4")]) + +(define_expand "movsicc" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (if_then_else:SI (match_operand 1 "comparison_operator" "") + (match_operand:SI 2 "arith_reg_or_0_operand" "") + (match_operand:SI 3 "arith_reg_operand" "")))] + "TARGET_SHMEDIA || TARGET_PRETEND_CMOVE" +{ + if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) + && GET_MODE (XEXP (operands[1], 0)) == SImode + && (TARGET_SHMEDIA + || (REG_P (XEXP (operands[1], 0)) + && REGNO (XEXP (operands[1], 0)) == T_REG)) + && XEXP (operands[1], 1) == const0_rtx) + ; + + else if (TARGET_PRETEND_CMOVE) + { + enum rtx_code code = GET_CODE (operands[1]); + enum rtx_code new_code = code; + rtx op0 = XEXP (operands[1], 0); + rtx op1 = XEXP (operands[1], 1); + + if (! currently_expanding_to_rtl) + FAIL; + switch (code) + { + case LT: case LE: case LEU: case LTU: + if (GET_MODE_CLASS (GET_MODE (op0)) != MODE_INT) + break; + case NE: + new_code = reverse_condition (code); + break; + case EQ: case GT: case GE: case GEU: case GTU: + break; + default: + FAIL; + } + sh_emit_scc_to_t (new_code, op0, op1); + operands[1] = gen_rtx_fmt_ee (new_code == code ? NE : EQ, VOIDmode, + gen_rtx_REG (SImode, T_REG), const0_rtx); + } + else + { + if (!can_create_pseudo_p ()) + FAIL; + + operands[1] = sh_emit_cheap_store_flag (GET_MODE (operands[0]), + GET_CODE (operands[1]), + XEXP (operands[1], 0), + XEXP (operands[1], 1)); + if (!operands[1]) + FAIL; + } +}) + +(define_expand "movqicc" + [(set (match_operand:QI 0 "register_operand" "") + (if_then_else:QI (match_operand 1 "comparison_operator" "") + (match_operand:QI 2 "register_operand" "") + (match_operand:QI 3 "register_operand" "")))] + "TARGET_SHMEDIA" +{ + operands[0] = simplify_gen_subreg (SImode, operands[0], QImode, 0); + operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0); + operands[3] = simplify_gen_subreg (SImode, operands[3], QImode, 0); + emit (gen_movsicc (operands[0], operands[1], operands[2], operands[3])); + DONE; +}) + +;; ------------------------------------------------------------------------- +;; Addition instructions +;; ------------------------------------------------------------------------- + +(define_expand "adddi3" + [(set (match_operand:DI 0 "arith_reg_operand") + (plus:DI (match_operand:DI 1 "arith_reg_operand") + (match_operand:DI 2 "arith_operand")))] + "" +{ + if (TARGET_SH1) + { + operands[2] = force_reg (DImode, operands[2]); + emit_insn (gen_adddi3_compact (operands[0], operands[1], operands[2])); + DONE; + } +}) + +(define_insn "*adddi3_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (plus:DI (match_operand:DI 1 "arith_reg_operand" "%r,r") + (match_operand:DI 2 "arith_operand" "r,I10")))] + "TARGET_SHMEDIA" + "@ + add %1, %2, %0 + addi %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "*adddisi3_media" + [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r,r") 0) + (plus:DI (match_operand:DI 1 "arith_reg_operand" "%r,r") + (match_operand:DI 2 "arith_operand" "r,I10")))] + "TARGET_SHMEDIA" + "@ + add.l %1, %2, %0 + addi.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "adddi3z_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extend:DI + (plus:SI (match_operand:SI 1 "extend_reg_operand" "r") + (match_operand:SI 2 "extend_reg_or_0_operand" "rN"))))] + "TARGET_SHMEDIA" + "addz.l %1, %N2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn_and_split "adddi3_compact" + [(set (match_operand:DI 0 "arith_reg_dest") + (plus:DI (match_operand:DI 1 "arith_reg_operand") + (match_operand:DI 2 "arith_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + emit_insn (gen_clrt ()); + emit_insn (gen_addc (gen_lowpart (SImode, operands[0]), + gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2]))); + emit_insn (gen_addc (gen_highpart (SImode, operands[0]), + gen_highpart (SImode, operands[1]), + gen_highpart (SImode, operands[2]))); + DONE; +}) + +(define_insn "addc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0") + (match_operand:SI 2 "arith_reg_operand" "r")) + (reg:SI T_REG))) + (set (reg:SI T_REG) + (ltu:SI (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))] + "TARGET_SH1" + "addc %2,%0" + [(set_attr "type" "arith")]) + +;; A simplified version of the addc insn, where the exact value of the +;; T bit doesn't matter. This is easier for combine to pick up. +;; We allow a reg or 0 for one of the operands in order to be able to +;; do 'reg + T' sequences. Reload will load the constant 0 into the reg +;; as needed. +(define_insn "*addc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0") + (match_operand:SI 2 "arith_reg_or_0_operand" "r")) + (match_operand:SI 3 "t_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "addc %2,%0" + [(set_attr "type" "arith")]) + +;; Split 'reg + reg + 1' into a sett addc sequence, as it can be scheduled +;; better, if the sett insn can be done early. +(define_insn_and_split "*addc_r_r_1" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" "")) + (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(set (reg:SI T_REG) (const_int 1)) + (parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 1) (match_dup 2)) + (reg:SI T_REG))) + (clobber (reg:SI T_REG))])]) + +;; Left shifts by one are usually done with an add insn to avoid T_REG +;; clobbers. Thus addc can also be used to do something like '(x << 1) + 1'. +(define_insn_and_split "*addc_2r_1" + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (mult:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 2)) + (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(set (reg:SI T_REG) (const_int 1)) + (parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 1) (match_dup 1)) + (reg:SI T_REG))) + (clobber (reg:SI T_REG))])]) + +;; Sometimes combine will try to do 'reg + (0-reg) + 1' if the *addc pattern +;; matched. Split this up into a simple sub add sequence, as this will save +;; us one sett insn. +(define_insn_and_split "*minus_plus_one" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (plus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" "")) + (const_int 1)))] + "TARGET_SH1" + "#" + "&& 1" + [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))]) + +;; Split 'reg + T' into 'reg + 0 + T' to utilize the addc insn. +;; If the 0 constant can be CSE-ed, this becomes a one instruction +;; operation, as opposed to sequences such as +;; movt r2 +;; add r2,r3 +;; +;; Even if the constant is not CSE-ed, a sequence such as +;; mov #0,r2 +;; addc r2,r3 +;; can be scheduled much better since the load of the constant can be +;; done earlier, before any comparison insns that store the result in +;; the T bit. +(define_insn_and_split "*addc_r_1" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (plus:SI (match_operand:SI 1 "t_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (plus:SI (plus:SI (match_dup 2) (const_int 0)) + (match_dup 1))) + (clobber (reg:SI T_REG))])]) + +;; Use shlr-addc to do 'reg + (reg & 1)'. +(define_insn_and_split "*addc_r_lsb" + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 1)) + (match_operand:SI 2 "arith_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) (plus:SI (reg:SI T_REG) (match_dup 2))) + (clobber (reg:SI T_REG))])] +{ + emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[1])); +}) + +;; Use shlr-addc to do 'reg + reg + (reg & 1)'. +(define_insn_and_split "*addc_r_r_lsb" + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 1)) + (match_operand:SI 2 "arith_reg_operand")) + (match_operand:SI 3 "arith_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 2) (match_dup 3)) + (reg:SI T_REG))) + (clobber (reg:SI T_REG))])] +{ + emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[1])); +}) + +;; Canonicalize 'reg + (reg & 1) + reg' into 'reg + reg + (reg & 1)'. +(define_insn_and_split "*addc_r_lsb_r" + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 1)) + (plus:SI (match_operand:SI 2 "arith_reg_operand") + (match_operand:SI 3 "arith_reg_operand")))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (plus:SI (plus:SI (and:SI (match_dup 1) (const_int 1)) + (match_dup 2)) + (match_dup 3))) + (clobber (reg:SI T_REG))])]) + +;; Canonicalize '2 * reg + (reg & 1)' into 'reg + reg + (reg & 1)'. +(define_insn_and_split "*addc_2r_lsb" + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 1)) + (mult:SI (match_operand:SI 2 "arith_reg_operand") + (const_int 2)))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (plus:SI (plus:SI (and:SI (match_dup 1) (const_int 1)) + (match_dup 2)) + (match_dup 2))) + (clobber (reg:SI T_REG))])]) + +;; Use shll-addc to do 'reg + ((unsigned int)reg >> 31)'. +(define_insn_and_split "*addc_r_msb" + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 31)) + (match_operand:SI 2 "arith_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) (plus:SI (reg:SI T_REG) (match_dup 2))) + (clobber (reg:SI T_REG))])] +{ + emit_insn (gen_shll (gen_reg_rtx (SImode), operands[1])); +}) + +;; Use shll-addc to do 'reg + reg + ((unsigned int)reg >> 31)'. +(define_insn_and_split "*addc_r_r_msb" + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (plus:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 31)) + (match_operand:SI 2 "arith_reg_operand")) + (match_operand:SI 3 "arith_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 2) (match_dup 3)) + (reg:SI T_REG))) + (clobber (reg:SI T_REG))])] +{ + emit_insn (gen_shll (gen_reg_rtx (SImode), operands[1])); +}) + +;; Canonicalize '2 * reg + ((unsigned int)reg >> 31)' +;; into 'reg + reg + (reg & 1)'. +(define_insn_and_split "*addc_2r_msb" + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (mult:SI (match_operand:SI 1 "arith_reg_operand") + (const_int 2)) + (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand") + (const_int 31)))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (plus:SI (plus:SI (lshiftrt:SI (match_dup 2) (const_int 31)) + (match_dup 1)) + (match_dup 1))) + (clobber (reg:SI T_REG))])]) + +(define_expand "addsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (plus:SI (match_operand:SI 1 "arith_operand" "") + (match_operand:SI 2 "arith_operand" "")))] + "" +{ + if (TARGET_SHMEDIA) + operands[1] = force_reg (SImode, operands[1]); +}) + +(define_insn "addsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (plus:SI (match_operand:SI 1 "extend_reg_operand" "%r,r") + (match_operand:SI 2 "arith_operand" "r,I10")))] + "TARGET_SHMEDIA" + "@ + add.l %1, %2, %0 + addi.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "addsidi3_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (sign_extend:DI (plus:SI (match_operand:SI 1 "extend_reg_operand" + "%r,r") + (match_operand:SI 2 "arith_operand" + "r,I10"))))] + "TARGET_SHMEDIA" + "@ + add.l %1, %2, %0 + addi.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "*addsi3_compact" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (plus:SI (match_operand:SI 1 "arith_operand" "%0") + (match_operand:SI 2 "arith_operand" "rI08")))] + "TARGET_SH1" + "add %2,%0" + [(set_attr "type" "arith")]) + +;; ------------------------------------------------------------------------- +;; Subtraction instructions +;; ------------------------------------------------------------------------- + +(define_expand "subdi3" + [(set (match_operand:DI 0 "arith_reg_operand" "") + (minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "") + (match_operand:DI 2 "arith_reg_operand" "")))] + "" +{ + if (TARGET_SH1) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_subdi3_compact (operands[0], operands[1], operands[2])); + DONE; + } +}) + +(define_insn "*subdi3_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rN") + (match_operand:DI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "sub %N1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "subdisi3_media" + [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0) + (minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rN") + (match_operand:DI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "sub.l %N1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn_and_split "subdi3_compact" + [(set (match_operand:DI 0 "arith_reg_dest") + (minus:DI (match_operand:DI 1 "arith_reg_operand") + (match_operand:DI 2 "arith_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + emit_insn (gen_clrt ()); + emit_insn (gen_subc (gen_lowpart (SImode, operands[0]), + gen_lowpart (SImode, operands[1]), + gen_lowpart (SImode, operands[2]))); + emit_insn (gen_subc (gen_highpart (SImode, operands[0]), + gen_highpart (SImode, operands[1]), + gen_highpart (SImode, operands[2]))); + DONE; +}) + +(define_insn "subc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")) + (reg:SI T_REG))) + (set (reg:SI T_REG) + (gtu:SI (minus:SI (minus:SI (match_dup 1) (match_dup 2)) + (reg:SI T_REG)) + (match_dup 1)))] + "TARGET_SH1" + "subc %2,%0" + [(set_attr "type" "arith")]) + +;; A simplified version of the subc insn, where the exact value of the +;; T bit doesn't matter. This is easier for combine to pick up. +;; We allow a reg or 0 for one of the operands in order to be able to +;; do 'reg - T' sequences. Reload will load the constant 0 into the reg +;; as needed. +(define_insn "*subc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_or_0_operand" "r")) + (match_operand:SI 3 "t_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "subc %2,%0" + [(set_attr "type" "arith")]) + +;; Split reg - reg - 1 into a sett subc sequence, as it can be scheduled +;; better, if the sett insn can be done early. +;; Notice that combine turns 'a - b - 1' into 'a + (~b)'. +(define_insn_and_split "*subc" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (plus:SI (not:SI (match_operand:SI 1 "arith_reg_operand" "")) + (match_operand:SI 2 "arith_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(set (reg:SI T_REG) (const_int 1)) + (parallel [(set (match_dup 0) + (minus:SI (minus:SI (match_dup 2) (match_dup 1)) + (reg:SI T_REG))) + (clobber (reg:SI T_REG))])]) + +;; Split 'reg - T' into 'reg - 0 - T' to utilize the subc insn. +;; If the 0 constant can be CSE-ed, this becomes a one instruction +;; operation, as opposed to sequences such as +;; movt r2 +;; sub r2,r3 +;; +;; Even if the constant is not CSE-ed, a sequence such as +;; mov #0,r2 +;; subc r2,r3 +;; can be scheduled much better since the load of the constant can be +;; done earlier, before any comparison insns that store the result in +;; the T bit. +(define_insn_and_split "*subc" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (minus:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "t_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(parallel [(set (match_dup 0) + (minus:SI (minus:SI (match_dup 1) (const_int 0)) + (match_dup 2))) + (clobber (reg:SI T_REG))])]) + +(define_insn "*subsi3_internal" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (minus:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "r")))] + "TARGET_SH1" + "sub %2,%0" + [(set_attr "type" "arith")]) + +(define_insn_and_split "*subsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (minus:SI (match_operand:SI 1 "minuend_operand" "rN") + (match_operand:SI 2 "extend_reg_operand" "r")))] + "TARGET_SHMEDIA + && (operands[1] != constm1_rtx + || (GET_CODE (operands[2]) != TRUNCATE + && GET_CODE (operands[2]) != SUBREG))" + "sub.l %N1, %2, %0" + "operands[1] == constm1_rtx" + [(set (match_dup 0) (xor:SI (match_dup 2) (match_dup 1)))] + "" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest" "") + (zero_extend:SI (subreg:QI (not:SI (subreg:SI (match_operand:QI 1 + "general_extend_operand" + "") 0)) 0)))] + "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN" + [(set (match_dup 0) (zero_extend:SI (match_dup 1))) + (set (match_dup 0) (xor:SI (match_dup 0) (const_int 255)))] + "") + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest" "") + (zero_extend:SI (subreg:QI (not:SI (subreg:SI (match_operand:QI 1 + "general_extend_operand" + "") 0)) 3)))] + "TARGET_SHMEDIA && TARGET_BIG_ENDIAN" + [(set (match_dup 0) (zero_extend:SI (match_dup 1))) + (set (match_dup 0) (xor:SI (match_dup 0) (const_int 255)))] + "") + +;; Convert +;; constant - reg +;; to +;; neg reg +;; add reg, #const +;; since this will sometimes save one instruction. +;; Otherwise we might get a sequence like +;; mov #const, rY +;; sub rY, rX +;; mov rX, rY +;; if the source and dest regs are the same. +(define_expand "subsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (minus:SI (match_operand:SI 1 "arith_operand" "") + (match_operand:SI 2 "arith_reg_operand" "")))] + "" +{ + if (TARGET_SH1 && CONST_INT_P (operands[1])) + { + emit_insn (gen_negsi2 (operands[0], operands[2])); + emit_insn (gen_addsi3 (operands[0], operands[0], operands[1])); + DONE; + } + if (TARGET_SHMEDIA) + { + if (!can_create_pseudo_p () + && ! arith_reg_or_0_operand (operands[1], SImode)) + FAIL; + if (operands[1] != const0_rtx && GET_CODE (operands[1]) != SUBREG) + operands[1] = force_reg (SImode, operands[1]); + } +}) + +;; ------------------------------------------------------------------------- +;; Division instructions +;; ------------------------------------------------------------------------- + +;; We take advantage of the library routines which don't clobber as many +;; registers as a normal function call would. + +;; The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it +;; also has an effect on the register that holds the address of the sfunc. +;; To make this work, we have an extra dummy insn that shows the use +;; of this register for reorg. + +(define_insn "use_sfunc_addr" + [(set (reg:SI PR_REG) + (unspec:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_SFUNC))] + "TARGET_SH1 && check_use_sfunc_addr (insn, operands[0])" + "" + [(set_attr "length" "0")]) + +(define_insn "udivsi3_sh2a" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (udiv:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "z")))] + "TARGET_SH2A" + "divu %2,%1" + [(set_attr "type" "arith") + (set_attr "in_delay_slot" "no")]) + +;; We must use a pseudo-reg forced to reg 0 in the SET_DEST rather than +;; hard register 0. If we used hard register 0, then the next instruction +;; would be a move from hard register 0 to a pseudo-reg. If the pseudo-reg +;; gets allocated to a stack slot that needs its address reloaded, then +;; there is nothing to prevent reload from using r0 to reload the address. +;; This reload would clobber the value in r0 we are trying to store. +;; If we let reload allocate r0, then this problem can never happen. +(define_insn "udivsi3_i1" + [(set (match_operand:SI 0 "register_operand" "=z") + (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R4_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +; Since shmedia-nofpu code could be linked against shcompact code, and +; the udivsi3 libcall has the same name, we must consider all registers +; clobbered that are in the union of the registers clobbered by the +; shmedia and the shcompact implementation. Note, if the shcompact +; implementation actually used shcompact code, we'd need to clobber +; also r23 and fr23. +(define_insn "udivsi3_i1_media" + [(set (match_operand:SI 0 "register_operand" "=z") + (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_MEDIA_REG)) + (clobber (reg:SI PR_MEDIA_REG)) + (clobber (reg:SI R20_REG)) + (clobber (reg:SI R21_REG)) + (clobber (reg:SI R22_REG)) + (clobber (reg:DI TR0_REG)) + (clobber (reg:DI TR1_REG)) + (clobber (reg:DI TR2_REG)) + (use (match_operand 1 "target_reg_operand" "b"))] + "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)" + "blink %1, r18" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "udivsi3_i4_media" + [(set (match_dup 3) + (zero_extend:DI (match_operand:SI 1 "register_operand" ""))) + (set (match_dup 4) + (zero_extend:DI (match_operand:SI 2 "register_operand" ""))) + (set (match_dup 5) (float:DF (match_dup 3))) + (set (match_dup 6) (float:DF (match_dup 4))) + (set (match_dup 7) (div:DF (match_dup 5) (match_dup 6))) + (set (match_dup 8) (fix:DI (match_dup 7))) + (set (match_operand:SI 0 "register_operand" "") + (truncate:SI (match_dup 8)))] + "TARGET_SHMEDIA_FPU" +{ + operands[3] = gen_reg_rtx (DImode); + operands[4] = gen_reg_rtx (DImode); + operands[5] = gen_reg_rtx (DFmode); + operands[6] = gen_reg_rtx (DFmode); + operands[7] = gen_reg_rtx (DFmode); + operands[8] = gen_reg_rtx (DImode); +}) + +(define_insn "udivsi3_i4" + [(set (match_operand:SI 0 "register_operand" "=y") + (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:DF DR0_REG)) + (clobber (reg:DF DR2_REG)) + (clobber (reg:DF DR4_REG)) + (clobber (reg:SI R0_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (use (reg:PSI FPSCR_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "fp_mode" "double") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "udivsi3_i4_single" + [(set (match_operand:SI 0 "register_operand" "=y") + (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:DF DR0_REG)) + (clobber (reg:DF DR2_REG)) + (clobber (reg:DF DR4_REG)) + (clobber (reg:SI R0_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT) + && TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "udivsi3_i4_int" + [(set (match_operand:SI 0 "register_operand" "=z") + (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI MACH_REG)) + (clobber (reg:SI MACL_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH1" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + + +(define_expand "udivsi3" + [(set (match_dup 3) (symbol_ref:SI "__udivsi3")) + (set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" "")) + (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" "")) + (parallel [(set (match_operand:SI 0 "register_operand" "") + (udiv:SI (reg:SI R4_REG) + (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R4_REG)) + (use (match_dup 3))])] + "" +{ + rtx last; + + operands[3] = gen_reg_rtx (Pmode); + /* Emit the move of the address to a pseudo outside of the libcall. */ + if (TARGET_DIVIDE_CALL_TABLE) + { + /* libgcc2:__udivmoddi4 is not supposed to use an actual division, since + that causes problems when the divide code is supposed to come from a + separate library. Division by zero is undefined, so dividing 1 can be + implemented by comparing with the divisor. */ + if (operands[1] == const1_rtx && currently_expanding_to_rtl) + { + rtx test = gen_rtx_GEU (VOIDmode, operands[1], operands[2]); + emit_insn (gen_cstoresi4 (operands[0], test, + operands[1], operands[2])); + DONE; + } + else if (operands[2] == const0_rtx) + { + emit_move_insn (operands[0], operands[2]); + DONE; + } + function_symbol (operands[3], "__udivsi3_i4i", SFUNC_GOT); + last = gen_udivsi3_i4_int (operands[0], operands[3]); + } + else if (TARGET_DIVIDE_CALL_FP) + { + function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC); + if (TARGET_FPU_SINGLE) + last = gen_udivsi3_i4_single (operands[0], operands[3]); + else + last = gen_udivsi3_i4 (operands[0], operands[3]); + } + else if (TARGET_SHMEDIA_FPU) + { + operands[1] = force_reg (SImode, operands[1]); + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_udivsi3_i4_media (operands[0], operands[1], operands[2])); + DONE; + } + else if (TARGET_SH2A) + { + operands[1] = force_reg (SImode, operands[1]); + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_udivsi3_sh2a (operands[0], operands[1], operands[2])); + DONE; + } + else if (TARGET_SH5) + { + function_symbol (operands[3], + TARGET_FPU_ANY ? "__udivsi3_i4" : "__udivsi3", + SFUNC_STATIC); + + if (TARGET_SHMEDIA) + last = gen_udivsi3_i1_media (operands[0], operands[3]); + else if (TARGET_FPU_ANY) + last = gen_udivsi3_i4_single (operands[0], operands[3]); + else + last = gen_udivsi3_i1 (operands[0], operands[3]); + } + else + { + function_symbol (operands[3], "__udivsi3", SFUNC_STATIC); + last = gen_udivsi3_i1 (operands[0], operands[3]); + } + emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]); + emit_insn (last); + DONE; +}) + +(define_insn "divsi3_sh2a" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (div:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "z")))] + "TARGET_SH2A" + "divs %2,%1" + [(set_attr "type" "arith") + (set_attr "in_delay_slot" "no")]) + +(define_insn "divsi3_i1" + [(set (match_operand:SI 0 "register_operand" "=z") + (div:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R2_REG)) + (clobber (reg:SI R3_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "divsi3_i1_media" + [(set (match_operand:SI 0 "register_operand" "=z") + (div:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_MEDIA_REG)) + (clobber (reg:SI PR_MEDIA_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R20_REG)) + (clobber (reg:SI R21_REG)) + (clobber (reg:SI TR0_REG)) + (use (match_operand 1 "target_reg_operand" "b"))] + "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)" + "blink %1, r18" + [(set_attr "type" "sfunc")]) + +(define_insn "divsi3_media_2" + [(set (match_operand:SI 0 "register_operand" "=z") + (div:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_MEDIA_REG)) + (clobber (reg:SI PR_MEDIA_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R21_REG)) + (clobber (reg:SI TR0_REG)) + (use (reg:SI R20_REG)) + (use (match_operand 1 "target_reg_operand" "b"))] + "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)" + "blink %1, r18" + [(set_attr "type" "sfunc")]) + +;; This pattern acts as a placeholder for -mdiv=inv:call to carry +;; hard reg clobbers and data dependencies that we need when we want +;; to rematerialize the division into a call. +(define_insn_and_split "divsi_inv_call" + [(set (match_operand:SI 0 "register_operand" "=r") + (div:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r"))) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (clobber (reg:SI T_MEDIA_REG)) + (clobber (reg:SI PR_MEDIA_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R21_REG)) + (clobber (reg:SI TR0_REG)) + (clobber (reg:SI R20_REG)) + (use (match_operand:SI 3 "register_operand" "r"))] + "TARGET_SHMEDIA" + "#" + "&& (reload_in_progress || reload_completed)" + [(set (match_dup 0) (match_dup 3))] + "" + [(set_attr "highpart" "must_split")]) + +;; This is the combiner pattern for -mdiv=inv:call . +(define_insn_and_split "*divsi_inv_call_combine" + [(set (match_operand:SI 0 "register_operand" "=z") + (div:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r"))) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (clobber (reg:SI T_MEDIA_REG)) + (clobber (reg:SI PR_MEDIA_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R21_REG)) + (clobber (reg:SI TR0_REG)) + (clobber (reg:SI R20_REG)) + (use (unspec:SI [(match_dup 1) + (match_operand:SI 3 "" "") + (unspec:SI [(match_operand:SI 4 "" "") + (match_dup 3) + (match_operand:DI 5 "" "")] + UNSPEC_DIV_INV_M2) + (match_operand:DI 6 "" "") + (const_int 0) + (const_int 0)] + UNSPEC_DIV_INV_M3))] + "TARGET_SHMEDIA" + "#" + "&& (reload_in_progress || reload_completed)" + [(pc)] +{ + const char *name = sh_divsi3_libfunc; + enum sh_function_kind kind = SFUNC_GOT; + rtx sym; + + emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, R5_REG), operands[2]); + while (TARGET_DIVIDE_INV_CALL2) + { + rtx x = operands[3]; + + if (GET_CODE (x) != UNSPEC || XINT (x, 1) != UNSPEC_DIV_INV_M1) + break; + x = XVECEXP (x, 0, 0); + name = "__sdivsi3_2"; + kind = SFUNC_STATIC; + emit_move_insn (gen_rtx_REG (DImode, R20_REG), x); + break; + } + sym = function_symbol (NULL, name, kind); + emit_insn (gen_divsi3_media_2 (operands[0], sym)); + DONE; +} + [(set_attr "highpart" "must_split")]) + +(define_expand "divsi3_i4_media" + [(set (match_dup 3) (float:DF (match_operand:SI 1 "register_operand" "r"))) + (set (match_dup 4) (float:DF (match_operand:SI 2 "register_operand" "r"))) + (set (match_dup 5) (div:DF (match_dup 3) (match_dup 4))) + (set (match_operand:SI 0 "register_operand" "=r") + (fix:SI (match_dup 5)))] + "TARGET_SHMEDIA_FPU" +{ + operands[3] = gen_reg_rtx (DFmode); + operands[4] = gen_reg_rtx (DFmode); + operands[5] = gen_reg_rtx (DFmode); +}) + +(define_insn "divsi3_i4" + [(set (match_operand:SI 0 "register_operand" "=y") + (div:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI PR_REG)) + (clobber (reg:DF DR0_REG)) + (clobber (reg:DF DR2_REG)) + (use (reg:PSI FPSCR_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "fp_mode" "double") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "divsi3_i4_single" + [(set (match_operand:SI 0 "register_operand" "=y") + (div:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI PR_REG)) + (clobber (reg:DF DR0_REG)) + (clobber (reg:DF DR2_REG)) + (clobber (reg:SI R2_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT) + && TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "divsi3_i4_int" + [(set (match_operand:SI 0 "register_operand" "=z") + (div:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI MACH_REG)) + (clobber (reg:SI MACL_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH1" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "divsi3" + [(set (match_dup 3) (symbol_ref:SI "__sdivsi3")) + (set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" "")) + (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" "")) + (parallel [(set (match_operand:SI 0 "register_operand" "") + (div:SI (reg:SI R4_REG) + (reg:SI R5_REG))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R2_REG)) + (clobber (reg:SI R3_REG)) + (use (match_dup 3))])] + "" +{ + rtx last; + + operands[3] = gen_reg_rtx (Pmode); + /* Emit the move of the address to a pseudo outside of the libcall. */ + if (TARGET_DIVIDE_CALL_TABLE) + { + function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT); + last = gen_divsi3_i4_int (operands[0], operands[3]); + } + else if (TARGET_DIVIDE_CALL_FP) + { + function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC); + if (TARGET_FPU_SINGLE) + last = gen_divsi3_i4_single (operands[0], operands[3]); + else + last = gen_divsi3_i4 (operands[0], operands[3]); + } + else if (TARGET_SH2A) + { + operands[1] = force_reg (SImode, operands[1]); + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_divsi3_sh2a (operands[0], operands[1], operands[2])); + DONE; + } + else if (TARGET_DIVIDE_INV) + { + rtx dividend = operands[1]; + rtx divisor = operands[2]; + rtx tab_base; + rtx nsb_res = gen_reg_rtx (DImode); + rtx norm64 = gen_reg_rtx (DImode); + rtx tab_ix = gen_reg_rtx (DImode); + rtx norm32 = gen_reg_rtx (SImode); + rtx i92 = force_reg (DImode, GEN_INT (92)); + rtx scratch0a = gen_reg_rtx (DImode); + rtx scratch0b = gen_reg_rtx (DImode); + rtx inv0 = gen_reg_rtx (SImode); + rtx scratch1a = gen_reg_rtx (DImode); + rtx scratch1b = gen_reg_rtx (DImode); + rtx shift = gen_reg_rtx (DImode); + rtx i2p27, i43; + rtx inv1 = gen_reg_rtx (SImode); + rtx scratch2a = gen_reg_rtx (DImode); + rtx scratch2b = gen_reg_rtx (SImode); + rtx inv2 = gen_reg_rtx (SImode); + rtx scratch3a = gen_reg_rtx (DImode); + rtx scratch3b = gen_reg_rtx (DImode); + rtx scratch3c = gen_reg_rtx (DImode); + rtx scratch3d = gen_reg_rtx (SImode); + rtx scratch3e = gen_reg_rtx (DImode); + rtx result = gen_reg_rtx (SImode); + + if (! arith_reg_or_0_operand (dividend, SImode)) + dividend = force_reg (SImode, dividend); + if (! arith_reg_operand (divisor, SImode)) + divisor = force_reg (SImode, divisor); + if (flag_pic && Pmode != DImode) + { + tab_base = gen_rtx_SYMBOL_REF (Pmode, "__div_table"); + tab_base = gen_datalabel_ref (tab_base); + tab_base = force_reg (DImode, gen_rtx_SIGN_EXTEND (DImode, tab_base)); + } + else + { + tab_base = gen_rtx_SYMBOL_REF (DImode, "__div_table"); + tab_base = gen_datalabel_ref (tab_base); + tab_base = force_reg (DImode, tab_base); + } + if (TARGET_DIVIDE_INV20U) + i2p27 = force_reg (DImode, GEN_INT (-2 << 27)); + else + i2p27 = GEN_INT (0); + if (TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L) + i43 = force_reg (DImode, GEN_INT (43)); + else + i43 = GEN_INT (0); + emit_insn (gen_nsbdi (nsb_res, + simplify_gen_subreg (DImode, divisor, SImode, 0))); + emit_insn (gen_ashldi3_media (norm64, + gen_rtx_SUBREG (DImode, divisor, 0), + nsb_res)); + emit_insn (gen_ashrdi3_media (tab_ix, norm64, GEN_INT (58))); + emit_insn (gen_ashrdisi3_media_high (norm32, norm64, GEN_INT (32))); + emit_insn (gen_divsi_inv_m1 (inv1, tab_base, tab_ix, norm32, + inv0, scratch0a, scratch0b, + scratch1a, scratch1b)); + emit_insn (gen_subdi3 (shift, i92, nsb_res)); + emit_insn (gen_divsi_inv_m2 (inv2, norm32, inv1, i92, + scratch2a)); + emit_insn (gen_divsi_inv_m3 (result, dividend, inv1, inv2, shift, + i2p27, i43, + scratch3a, scratch3b, scratch3c, + scratch2a, scratch2b, scratch3d, scratch3e)); + if (TARGET_DIVIDE_INV_CALL || TARGET_DIVIDE_INV_CALL2) + emit_insn (gen_divsi_inv_call (operands[0], dividend, divisor, result)); + else if (TARGET_DIVIDE_INV_FP) + emit_insn (gen_divsi_inv_fp (operands[0], dividend, divisor, result, + gen_reg_rtx (SImode), gen_reg_rtx (SImode), + gen_reg_rtx (DFmode), gen_reg_rtx (DFmode), + gen_reg_rtx (DFmode))); + else + emit_move_insn (operands[0], result); + DONE; + } + else if (TARGET_SHMEDIA_FPU && TARGET_DIVIDE_FP) + { + operands[1] = force_reg (SImode, operands[1]); + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_divsi3_i4_media (operands[0], operands[1], operands[2])); + DONE; + } + else if (TARGET_SH5) + { + if (TARGET_DIVIDE_CALL2) + { + rtx tab_base = gen_rtx_SYMBOL_REF (Pmode, "__div_table"); + tab_base = gen_datalabel_ref (tab_base); + emit_move_insn (gen_rtx_REG (Pmode, R20_REG), tab_base); + } + if (TARGET_FPU_ANY && TARGET_SH1) + function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC); + else if (TARGET_DIVIDE_CALL2) + function_symbol (operands[3], "__sdivsi3_2", SFUNC_STATIC); + else + function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT); + + if (TARGET_SHMEDIA) + last = ((TARGET_DIVIDE_CALL2 ? gen_divsi3_media_2 : gen_divsi3_i1_media) + (operands[0], operands[3])); + else if (TARGET_FPU_ANY) + last = gen_divsi3_i4_single (operands[0], operands[3]); + else + last = gen_divsi3_i1 (operands[0], operands[3]); + } + else + { + function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT); + last = gen_divsi3_i1 (operands[0], operands[3]); + } + emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]); + emit_insn (last); + DONE; +}) + +;; operands: scratch, tab_base, tab_ix +;; These are unspecs because we could generate an indexed addressing mode +;; even if -m5-32media, where INDEX_REG_CLASS == NO_REGS, and this would +;; confuse reload. See PR27117. +(define_insn "divsi_inv_qitable" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI (unspec:QI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")] + UNSPEC_DIV_INV_TABLE)))] + "TARGET_SHMEDIA" + "ldx.ub %1, %2, %0" + [(set_attr "type" "load_media") + (set_attr "highpart" "user")]) + +;; operands: scratch, tab_base, tab_ix +(define_insn "divsi_inv_hitable" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI (unspec:HI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r")] + UNSPEC_DIV_INV_TABLE)))] + "TARGET_SHMEDIA" + "ldx.w %1, %2, %0" + [(set_attr "type" "load_media") + (set_attr "highpart" "user")]) + +;; operands: inv0, tab_base, tab_ix, norm32 +;; scratch equiv in sdivsi3_2: r19, r21 +(define_expand "divsi_inv_m0" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "r")] + UNSPEC_DIV_INV_M0)) + (clobber (match_operand:DI 4 "register_operand" "=r")) + (clobber (match_operand:DI 5 "register_operand" "=r"))] + "TARGET_SHMEDIA" +{ +/* +tab_base: r20 +tab_ix: r21 +norm32: r25 + ldx.ub r20, r21, r19 // u0.8 + shlli r21, 1, r21 + muls.l r25, r19, r19 // s2.38 + ldx.w r20, r21, r21 // s2.14 + shari r19, 24, r19 // truncate to s2.14 + sub r21, r19, r19 // some 11 bit inverse in s1.14 +*/ + + rtx inv0 = operands[0]; + rtx tab_base = operands[1]; + rtx tab_ix = operands[2]; + rtx norm32 = operands[3]; + rtx scratch0 = operands[4]; + rtx scratch0_si = gen_lowpart (SImode, scratch0); + rtx scratch1 = operands[5]; + + emit_insn (gen_divsi_inv_qitable (scratch0, tab_base, tab_ix)); + emit_insn (gen_ashldi3_media (scratch1, tab_ix, GEN_INT (1))); + emit_insn (gen_mulsidi3_media (scratch0, norm32, scratch0_si)); + emit_insn (gen_divsi_inv_hitable (scratch1, tab_base, scratch1)); + emit_insn (gen_ashrdi3_media (scratch0, scratch0, GEN_INT (24))); + emit_insn (gen_subdisi3_media (inv0, scratch1, scratch0)); + DONE; +}) + +;; operands: inv1, tab_base, tab_ix, norm32 +(define_insn_and_split "divsi_inv_m1" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "r")] + UNSPEC_DIV_INV_M1)) + (clobber (match_operand:SI 4 "register_operand" "=r")) + (clobber (match_operand:DI 5 "register_operand" "=r")) + (clobber (match_operand:DI 6 "register_operand" "=r")) + (clobber (match_operand:DI 7 "register_operand" "=r")) + (clobber (match_operand:DI 8 "register_operand" "=r"))] + "TARGET_SHMEDIA" + "#" + "&& !can_create_pseudo_p ()" + [(pc)] +{ +/* inv0: r19 + muls.l r19, r19, r18 // u0.28 + muls.l r25, r18, r18 // s2.58 + shlli r19, 45, r0 // multiply by two and convert to s2.58 + sub r0, r18, r18 + shari r18, 28, r18 // some 18 bit inverse in s1.30 +*/ + + rtx inv1 = operands[0]; + rtx tab_base = operands[1]; + rtx tab_ix = operands[2]; + rtx norm32 = operands[3]; + rtx inv0 = operands[4]; + rtx inv0_di = simplify_gen_subreg (DImode, inv0, SImode, 0); + rtx scratch0a = operands[5]; + rtx scratch0b = operands[6]; + rtx scratch0 = operands[7]; + rtx scratch1 = operands[8]; + rtx scratch1_si = gen_lowpart (SImode, scratch1); + + emit_insn (gen_divsi_inv_m0 (inv0, tab_base, tab_ix, norm32, + scratch0a, scratch0b)); + emit_insn (gen_mulsidi3_media (scratch1, inv0, inv0)); + emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch1_si)); + emit_insn (gen_ashldi3_media (scratch0, inv0_di, GEN_INT (45))); + emit_insn (gen_subdi3 (scratch1, scratch0, scratch1)); + emit_insn (gen_ashrdisi3_media_opaque (inv1, scratch1, GEN_INT (28))); + DONE; +}) + +;; operands: inv2, norm32, inv1, i92 +(define_insn_and_split "divsi_inv_m2" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r") + (match_operand:DI 3 "register_operand" "r")] + UNSPEC_DIV_INV_M2)) + (clobber (match_operand:DI 4 "register_operand" "=r"))] + "TARGET_SHMEDIA" + "#" + "&& !can_create_pseudo_p ()" + [(pc)] +{ +/* + muls.l r18, r25, r0 // s2.60 + shari r0, 16, r0 // s-16.44 + sub + muls.l r0, r18, r19 // s-16.74 + shari r19, 30, r19 // s-16.44 +*/ + rtx inv2 = operands[0]; + rtx norm32 = operands[1]; + rtx inv1 = operands[2]; + rtx i92 = operands[3]; + rtx scratch0 = operands[4]; + rtx scratch0_si = gen_lowpart (SImode, scratch0); + + emit_insn (gen_mulsidi3_media (scratch0, inv1, norm32)); + emit_insn (gen_ashrdi3_media (scratch0, scratch0, GEN_INT (16))); + emit_insn (gen_subdi3 (scratch0, i92, scratch0)); + emit_insn (gen_mulsidi3_media (scratch0, scratch0_si, inv1)); + emit_insn (gen_ashrdisi3_media_opaque (inv2, scratch0, GEN_INT (30))); + DONE; +}) + +(define_insn_and_split "divsi_inv_m3" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "arith_reg_or_0_operand" "rN") + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "r") + (match_operand:DI 4 "register_operand" "r") + (match_operand:DI 5 "arith_reg_or_0_operand" "rN") + (match_operand:DI 6 "arith_reg_or_0_operand" "rN")] + UNSPEC_DIV_INV_M3)) + (clobber (match_operand:DI 7 "register_operand" "=r")) + (clobber (match_operand:DI 8 "register_operand" "=r")) + (clobber (match_operand:DI 9 "register_operand" "=r")) + (clobber (match_operand:DI 10 "register_operand" "=r")) + (clobber (match_operand:SI 11 "register_operand" "=r")) + (clobber (match_operand:SI 12 "register_operand" "=r")) + (clobber (match_operand:DI 13 "register_operand" "=r"))] + "TARGET_SHMEDIA" + "#" + "&& !can_create_pseudo_p ()" + [(pc)] +{ +/* + r0: result r1: shift r4: dividend r18: inv1 r19: inv2 + r0: scratch0 r19: scratch1 r21: scratch2 + + muls.l r18, r4, r25 // s32.30 + muls.l r19, r4, r19 // s15.30 + shari r25, 63, r21 + shari r19, 14, r19 // s18.-14 + sub r25, r19, r0 + shard r0, r1, r0 + sub r0, r21, r0 +*/ + + rtx result = operands[0]; + rtx dividend = operands[1]; + rtx inv1 = operands[2]; + rtx inv2 = operands[3]; + rtx shift = operands[4]; + rtx scratch0 = operands[7]; + rtx scratch1 = operands[8]; + rtx scratch2 = operands[9]; + + if (satisfies_constraint_N (dividend)) + { + emit_move_insn (result, dividend); + DONE; + } + + emit_insn (gen_mulsidi3_media (scratch0, inv1, dividend)); + emit_insn (gen_mulsidi3_media (scratch1, inv2, dividend)); + emit_insn (gen_ashrdi3_media (scratch2, scratch0, GEN_INT (63))); + emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (14))); + emit_insn (gen_adddi3 (scratch0, scratch0, scratch1)); + emit_insn (gen_ashrdi3_media (scratch0, scratch0, shift)); + emit_insn (gen_subdisi3_media (result, scratch0, scratch2)); + DONE; +}) + +;; operands: quotient, dividend, inv1, inv2, shift, i2p27, i43 +;; inv1: tab_base, tab_ix, norm32 +;; inv2: norm32, inv1, i92 +(define_insn_and_split "divsi_inv_m1_3" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "arith_reg_or_0_operand" "rN") + (unspec:SI [(match_operand:DI 2 "register_operand" "r") + (match_operand:DI 3 "register_operand" "r") + (match_operand:SI 4 "register_operand" "r")] + UNSPEC_DIV_INV_M1) + (unspec:SI [(match_dup 4) + (unspec:SI [(match_dup 2) + (match_dup 3) + (match_dup 4)] UNSPEC_DIV_INV_M1) + (match_operand:SI 5 "" "")] + UNSPEC_DIV_INV_M2) + (match_operand:DI 6 "register_operand" "r") + (match_operand:DI 7 "arith_reg_or_0_operand" "rN") + (match_operand:DI 8 "arith_reg_or_0_operand" "rN")] + UNSPEC_DIV_INV_M3)) + (clobber (match_operand:DI 9 "register_operand" "=r")) + (clobber (match_operand:DI 10 "register_operand" "=r")) + (clobber (match_operand:DI 11 "register_operand" "=r")) + (clobber (match_operand:DI 12 "register_operand" "=r")) + (clobber (match_operand:SI 13 "register_operand" "=r")) + (clobber (match_operand:SI 14 "register_operand" "=r")) + (clobber (match_operand:DI 15 "register_operand" "=r"))] + "TARGET_SHMEDIA + && (TARGET_DIVIDE_INV_MINLAT + || TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L)" + "#" + "&& !can_create_pseudo_p ()" + [(pc)] +{ + rtx result = operands[0]; + rtx dividend = operands[1]; + rtx tab_base = operands[2]; + rtx tab_ix = operands[3]; + rtx norm32 = operands[4]; + /* rtx i92 = operands[5]; */ + rtx shift = operands[6]; + rtx i2p27 = operands[7]; + rtx i43 = operands[8]; + rtx scratch0 = operands[9]; + rtx scratch0_si = gen_lowpart (SImode, scratch0); + rtx scratch1 = operands[10]; + rtx scratch1_si = gen_lowpart (SImode, scratch1); + rtx scratch2 = operands[11]; + rtx scratch3 = operands[12]; + rtx scratch4 = operands[13]; + rtx scratch4_di = simplify_gen_subreg (DImode, scratch4, SImode, 0); + rtx scratch5 = operands[14]; + rtx scratch5_di = simplify_gen_subreg (DImode, scratch5, SImode, 0); + rtx scratch6 = operands[15]; + + emit_insn (gen_divsi_inv_m0 (scratch4, tab_base, tab_ix, norm32, + scratch0, scratch1)); + /* inv0 == scratch4 */ + if (! TARGET_DIVIDE_INV20U) + { + emit_insn (gen_mulsidi3_media (scratch0, scratch4, scratch4)); + i2p27 = scratch0; + emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch0_si)); + } + else + { + emit_insn (gen_mulsidi3_media (scratch1, scratch4, scratch4)); + emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch1_si)); + } + emit_insn (gen_ashldi3_media (scratch2, scratch4_di, GEN_INT (45))); + emit_insn (gen_subdi3 (scratch1, scratch2, scratch1)); + emit_insn (gen_ashrdisi3_media_opaque (scratch4, scratch1, GEN_INT (28))); + /* inv1 == scratch4 */ + + if (TARGET_DIVIDE_INV_MINLAT) + { + emit_insn (gen_mulsidi3_media (scratch1, scratch4, norm32)); + emit_insn (gen_mulsidi3_media (scratch2, dividend, scratch4)); + emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (16))); + emit_insn (gen_mulsidi3_media (scratch1, scratch1_si, scratch4)); + emit_insn (gen_ashrdi3_media (scratch3, scratch2, GEN_INT (63))); + emit_insn (gen_ashrsi3_media (scratch5, dividend, GEN_INT (14))); + emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (30))); + emit_insn (gen_mulsidi3_media (scratch1, scratch1_si, scratch5)); + emit_insn (gen_xordi3 (scratch0, scratch3, i2p27)); + emit_insn (gen_adddi3 (scratch2, scratch2, scratch0)); + emit_insn (gen_subdi3 (scratch2, scratch2, scratch1)); + } + else + { + rtx label = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ()); + /* Use separate scratch regs for nsb and sign to allow scheduling. */ + emit_insn (gen_nsbdi (scratch6, + simplify_gen_subreg (DImode, dividend, SImode, 0))); + emit_insn (gen_xorsi3 (scratch5, dividend, norm32)); + emit_insn (gen_ashrdi3_media (scratch3, scratch5_di, GEN_INT (63))); + emit_insn (gen_divsi_inv20 (scratch2, + norm32, scratch4, dividend, + scratch6, scratch3, i43, + /* scratch0 may be shared with i2p27. */ + scratch0, scratch1, scratch5, + label, label, i2p27)); + } + emit_insn (gen_ashrdi3_media (scratch2, scratch2, shift)); + emit_insn (gen_subdisi3_media (result, scratch2, scratch3)); + DONE; +}) + +(define_insn "divsi_inv20" + [(set (match_operand:DI 0 "register_operand" "=&r") + (unspec:DI [(match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "register_operand" "r") + (match_operand:DI 4 "register_operand" "r") + (match_operand:DI 5 "register_operand" "r") + (match_operand:DI 6 "register_operand" "r") + (match_operand:DI 12 "register_operand" "r") + (match_operand 10 "target_operand" "b") + (match_operand 11 "immediate_operand" "i")] + UNSPEC_DIV_INV20)) + (clobber (match_operand:DI 7 "register_operand" "=&r")) + (clobber (match_operand:DI 8 "register_operand" "=&r")) + (clobber (match_operand:SI 9 "register_operand" "=r"))] + "TARGET_SHMEDIA + && (TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L)" +{ +/* operands: %0 div_result, %1 norm32, %2 inv1, %3 dividend, + %4 dividend_nsb, %5 result_sign, %6 i43, %12 i2p27, + %7 round_scratch, %8 scratch0 (di), %9 scratch1 (si) + %10 label (tr), %11 label (imm) + + muls.l inv1, norm32, scratch0 // s2.60 + muls.l inv1, dividend, result // s32.30 + xor i2p27, result_sign, round_scratch + bge/u dividend_nsb, i43, tr.. (label) + shari scratch0, 16, scratch0 // s-16.44 + muls.l sratch0_si, inv1, scratch0 // s-16.74 + sub result, round_scratch, result + shari dividend, 14, scratch1 // s19.-14 + shari scratch0, 30, scratch0 // s-16.44 + muls.l scratch0, scratch1, round_scratch // s15.30 +label: + sub result, round_scratch, result */ + + const bool likely = TARGET_DIVIDE_INV20L; + if (likely) + return + "muls.l %2, %3, %0" "\n" + " xor %12, %5, %7" "\n" + " bge/l %4, %6, %10" "\n" + " muls.l %2, %1, %8" "\n" + " shari %8, 16, %8" "\n" + " muls.l %8, %2, %8" "\n" + " shari %3, 14, %9" "\n" + " shari %8, 30, %8" "\n" + " muls.l %8, %9, %8" "\n" + " sub %0, %8, %0" "\n" + "%11: add %0, %7, %0"; + else + return + "muls.l %2, %1, %8" "\n" + " muls.l %2, %3, %0" "\n" + " xor %12, %5, %7" "\n" + " bge/u %4, %6, %10" "\n" + " shari %8, 16, %8" "\n" + " muls.l %8, %2, %8" "\n" + " sub %0, %7, %0" "\n" + " shari %3, 14, %9" "\n" + " shari %8, 30, %8" "\n" + " muls.l %8, %9, %7" "\n" + "%11: sub %0, %7, %0"; +}) + +(define_insn_and_split "divsi_inv_fp" + [(set (match_operand:SI 0 "general_movdst_operand" "=rf") + (div:SI (match_operand:SI 1 "general_movsrc_operand" "rf") + (match_operand:SI 2 "register_operand" "rf"))) + (use (match_operand:SI 3 "general_movsrc_operand" "r")) + (clobber (match_operand:SI 4 "register_operand" "=r")) + (clobber (match_operand:SI 5 "register_operand" "=r")) + (clobber (match_operand:DF 6 "register_operand" "=r")) + (clobber (match_operand:DF 7 "register_operand" "=r")) + (clobber (match_operand:DF 8 "register_operand" "=r"))] + "TARGET_SHMEDIA_FPU" + "#" + "&& (reload_in_progress || reload_completed)" + [(set (match_dup 0) (match_dup 3))] + "" + [(set_attr "highpart" "must_split")]) + +;; If a matching group of divide-by-inverse instructions is in the same +;; basic block after gcse & loop optimizations, we want to transform them +;; to a straight division using floating point for TARGET_DIVIDE_INV_FP. +(define_insn_and_split "*divsi_inv_fp_combine" + [(set (match_operand:SI 0 "register_operand" "=f") + (div:SI (match_operand:SI 1 "register_operand" "f") + (match_operand:SI 2 "register_operand" "f"))) + (use (unspec:SI [(match_dup 1) + (match_operand:SI 3 "" "") + (unspec:SI [(match_operand:SI 4 "" "") + (match_dup 3) + (match_operand:DI 5 "" "")] UNSPEC_DIV_INV_M2) + (match_operand:DI 6 "" "") + (const_int 0) + (const_int 0)] UNSPEC_DIV_INV_M3)) + (clobber (match_operand:SI 7 "fp_arith_reg_operand" "")) + (clobber (match_operand:SI 8 "fp_arith_reg_operand" "")) + (clobber (match_operand:DF 9 "fp_arith_reg_operand" "")) + (clobber (match_operand:DF 10 "fp_arith_reg_operand" "")) + (clobber (match_operand:DF 11 "fp_arith_reg_operand" ""))] + "TARGET_SHMEDIA_FPU && TARGET_DIVIDE_INV_FP && !can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 9) (float:DF (match_dup 1))) + (set (match_dup 10) (float:DF (match_dup 2))) + (set (match_dup 11) (div:DF (match_dup 9) (match_dup 10))) + (set (match_dup 8) + (fix:SI (match_dup 11))) + (set (match_dup 0) (match_dup 8))] +{ + if (! fp_arith_reg_operand (operands[1], SImode)) + { + emit_move_insn (operands[7], operands[1]); + operands[1] = operands[7]; + } + if (! fp_arith_reg_operand (operands[2], SImode)) + { + emit_move_insn (operands[8], operands[2]); + operands[2] = operands[8]; + } +} + [(set_attr "highpart" "must_split")]) + +;; ------------------------------------------------------------------------- +;; Multiplication instructions +;; ------------------------------------------------------------------------- + +(define_insn "umulhisi3_i" + [(set (reg:SI MACL_REG) + (mult:SI (zero_extend:SI + (match_operand:HI 0 "arith_reg_operand" "r")) + (zero_extend:SI + (match_operand:HI 1 "arith_reg_operand" "r"))))] + "TARGET_SH1" + "mulu.w %1,%0" + [(set_attr "type" "smpy")]) + +(define_insn "mulhisi3_i" + [(set (reg:SI MACL_REG) + (mult:SI (sign_extend:SI + (match_operand:HI 0 "arith_reg_operand" "r")) + (sign_extend:SI + (match_operand:HI 1 "arith_reg_operand" "r"))))] + "TARGET_SH1" + "muls.w %1,%0" + [(set_attr "type" "smpy")]) + +(define_expand "mulhisi3" + [(set (reg:SI MACL_REG) + (mult:SI (sign_extend:SI + (match_operand:HI 1 "arith_reg_operand" "")) + (sign_extend:SI + (match_operand:HI 2 "arith_reg_operand" "")))) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI MACL_REG))] + "TARGET_SH1" +{ + rtx insn, macl; + + macl = gen_rtx_REG (SImode, MACL_REG); + start_sequence (); + emit_insn (gen_mulhisi3_i (operands[1], operands[2])); + insn = get_insns (); + end_sequence (); + /* expand_binop can't find a suitable code in umul_widen_optab to + make a REG_EQUAL note from, so make one here. + See also smulsi3_highpart. + ??? Alternatively, we could put this at the calling site of expand_binop, + i.e. expand_expr. */ + /* Use emit_libcall_block for loop invariant code motion and to make + a REG_EQUAL note. */ + emit_libcall_block (insn, operands[0], macl, SET_SRC (single_set (insn))); + + DONE; +}) + +(define_expand "umulhisi3" + [(set (reg:SI MACL_REG) + (mult:SI (zero_extend:SI + (match_operand:HI 1 "arith_reg_operand" "")) + (zero_extend:SI + (match_operand:HI 2 "arith_reg_operand" "")))) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI MACL_REG))] + "TARGET_SH1" +{ + rtx insn, macl; + + macl = gen_rtx_REG (SImode, MACL_REG); + start_sequence (); + emit_insn (gen_umulhisi3_i (operands[1], operands[2])); + insn = get_insns (); + end_sequence (); + /* expand_binop can't find a suitable code in umul_widen_optab to + make a REG_EQUAL note from, so make one here. + See also smulsi3_highpart. + ??? Alternatively, we could put this at the calling site of expand_binop, + i.e. expand_expr. */ + /* Use emit_libcall_block for loop invariant code motion and to make + a REG_EQUAL note. */ + emit_libcall_block (insn, operands[0], macl, SET_SRC (single_set (insn))); + + DONE; +}) + +;; mulsi3 on the SH2 can be done in one instruction, on the SH1 we generate +;; a call to a routine which clobbers known registers. +(define_insn "" + [(set (match_operand:SI 1 "register_operand" "=z") + (mult:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (clobber (reg:SI MACL_REG)) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R3_REG)) + (clobber (reg:SI R2_REG)) + (clobber (reg:SI R1_REG)) + (use (match_operand:SI 0 "arith_reg_operand" "r"))] + "TARGET_SH1" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "mulsi3_call" + [(set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" "")) + (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" "")) + (parallel[(set (match_operand:SI 0 "register_operand" "") + (mult:SI (reg:SI R4_REG) + (reg:SI R5_REG))) + (clobber (reg:SI MACL_REG)) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R3_REG)) + (clobber (reg:SI R2_REG)) + (clobber (reg:SI R1_REG)) + (use (match_operand:SI 3 "register_operand" ""))])] + "TARGET_SH1" + "") + +(define_insn "mul_r" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (mult:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "arith_reg_operand" "z")))] + "TARGET_SH2A" + "mulr %2,%0" + [(set_attr "type" "dmpy")]) + +(define_insn "mul_l" + [(set (reg:SI MACL_REG) + (mult:SI (match_operand:SI 0 "arith_reg_operand" "r") + (match_operand:SI 1 "arith_reg_operand" "r")))] + "TARGET_SH2" + "mul.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_expand "mulsi3" + [(set (reg:SI MACL_REG) + (mult:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" ""))) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI MACL_REG))] + "TARGET_SH1" +{ + if (!TARGET_SH2) + { + /* The address must be set outside the libcall, + since it goes into a pseudo. */ + rtx sym = function_symbol (NULL, "__mulsi3", SFUNC_STATIC); + rtx addr = force_reg (SImode, sym); + rtx insns = gen_mulsi3_call (operands[0], operands[1], + operands[2], addr); + emit_insn (insns); + } + else + { + rtx macl = gen_rtx_REG (SImode, MACL_REG); + + emit_insn (gen_mul_l (operands[1], operands[2])); + /* consec_sets_giv can only recognize the first insn that sets a + giv as the giv insn. So we must tag this also with a REG_EQUAL + note. */ + emit_insn (gen_movsi_i ((operands[0]), macl)); + } + DONE; +}) + +(define_insn "mulsidi3_i" + [(set (reg:SI MACH_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r")) + (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))) + (const_int 32)))) + (set (reg:SI MACL_REG) + (mult:SI (match_dup 0) + (match_dup 1)))] + "TARGET_SH2" + "dmuls.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_expand "mulsidi3" + [(set (match_operand:DI 0 "arith_reg_dest" "") + (mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "")) + (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))))] + "TARGET_SH2 || TARGET_SHMEDIA" +{ + if (TARGET_SH2) + { + emit_insn (gen_mulsidi3_compact (operands[0], operands[1], operands[2])); + DONE; + } +}) + +(define_insn "mulsidi3_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (mult:DI (sign_extend:DI (match_operand:SI 1 "extend_reg_operand" "%r")) + (sign_extend:DI (match_operand:SI 2 "extend_reg_operand" "r"))))] + "TARGET_SHMEDIA" + "muls.l %1, %2, %0" + [(set_attr "type" "dmpy_media") + (set_attr "highpart" "ignore")]) + +(define_insn_and_split "mulsidi3_compact" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (mult:DI + (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")) + (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "r")))) + (clobber (reg:SI MACH_REG)) + (clobber (reg:SI MACL_REG))] + "TARGET_SH2" + "#" + "&& 1" + [(const_int 0)] +{ + rtx low_dst = gen_lowpart (SImode, operands[0]); + rtx high_dst = gen_highpart (SImode, operands[0]); + + emit_insn (gen_mulsidi3_i (operands[1], operands[2])); + + emit_move_insn (low_dst, gen_rtx_REG (SImode, MACL_REG)); + emit_move_insn (high_dst, gen_rtx_REG (SImode, MACH_REG)); + /* We need something to tag the possible REG_EQUAL notes on to. */ + emit_move_insn (operands[0], operands[0]); + DONE; +}) + +(define_insn "umulsidi3_i" + [(set (reg:SI MACH_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r")) + (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))) + (const_int 32)))) + (set (reg:SI MACL_REG) + (mult:SI (match_dup 0) + (match_dup 1)))] + "TARGET_SH2" + "dmulu.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_expand "umulsidi3" + [(set (match_operand:DI 0 "arith_reg_dest" "") + (mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "")) + (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))))] + "TARGET_SH2 || TARGET_SHMEDIA" +{ + if (TARGET_SH2) + { + emit_insn (gen_umulsidi3_compact (operands[0], operands[1], operands[2])); + DONE; + } +}) + +(define_insn "umulsidi3_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (mult:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_operand" "%r")) + (zero_extend:DI (match_operand:SI 2 "extend_reg_operand" "r"))))] + "TARGET_SHMEDIA" + "mulu.l %1, %2, %0" + [(set_attr "type" "dmpy_media") + (set_attr "highpart" "ignore")]) + +(define_insn_and_split "umulsidi3_compact" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (mult:DI + (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")) + (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "r")))) + (clobber (reg:SI MACH_REG)) + (clobber (reg:SI MACL_REG))] + "TARGET_SH2" + "#" + "&& 1" + [(const_int 0)] +{ + rtx low_dst = gen_lowpart (SImode, operands[0]); + rtx high_dst = gen_highpart (SImode, operands[0]); + + emit_insn (gen_umulsidi3_i (operands[1], operands[2])); + + emit_move_insn (low_dst, gen_rtx_REG (SImode, MACL_REG)); + emit_move_insn (high_dst, gen_rtx_REG (SImode, MACH_REG)); + /* We need something to tag the possible REG_EQUAL notes on to. */ + emit_move_insn (operands[0], operands[0]); + DONE; +}) + +(define_insn "smulsi3_highpart_i" + [(set (reg:SI MACH_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r")) + (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))) + (const_int 32)))) + (clobber (reg:SI MACL_REG))] + "TARGET_SH2" + "dmuls.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_expand "smulsi3_highpart" + [(parallel + [(set (reg:SI MACH_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI + (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "")) + (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))) + (const_int 32)))) + (clobber (reg:SI MACL_REG))]) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI MACH_REG))] + "TARGET_SH2" +{ + rtx insn, mach; + + mach = gen_rtx_REG (SImode, MACH_REG); + start_sequence (); + emit_insn (gen_smulsi3_highpart_i (operands[1], operands[2])); + insn = get_insns (); + end_sequence (); + /* expand_binop can't find a suitable code in mul_highpart_optab to + make a REG_EQUAL note from, so make one here. + See also {,u}mulhisi. + ??? Alternatively, we could put this at the calling site of expand_binop, + i.e. expand_mult_highpart. */ + /* Use emit_libcall_block for loop invariant code motion and to make + a REG_EQUAL note. */ + emit_libcall_block (insn, operands[0], mach, SET_SRC (single_set (insn))); + + DONE; +}) + +(define_insn "umulsi3_highpart_i" + [(set (reg:SI MACH_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r")) + (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))) + (const_int 32)))) + (clobber (reg:SI MACL_REG))] + "TARGET_SH2" + "dmulu.l %1,%0" + [(set_attr "type" "dmpy")]) + +(define_expand "umulsi3_highpart" + [(parallel + [(set (reg:SI MACH_REG) + (truncate:SI + (lshiftrt:DI + (mult:DI + (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "")) + (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))) + (const_int 32)))) + (clobber (reg:SI MACL_REG))]) + (set (match_operand:SI 0 "arith_reg_operand" "") + (reg:SI MACH_REG))] + "TARGET_SH2" +{ + rtx insn, mach; + + mach = gen_rtx_REG (SImode, MACH_REG); + start_sequence (); + emit_insn (gen_umulsi3_highpart_i (operands[1], operands[2])); + insn = get_insns (); + end_sequence (); + /* Use emit_libcall_block for loop invariant code motion and to make + a REG_EQUAL note. */ + emit_libcall_block (insn, operands[0], mach, SET_SRC (single_set (insn))); + + DONE; +}) + +(define_insn_and_split "muldi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (mult:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "arith_reg_operand" "r"))) + (clobber (match_scratch:DI 3 "=&r")) + (clobber (match_scratch:DI 4 "=r"))] + "TARGET_SHMEDIA" + "#" + "reload_completed" + [(const_int 0)] +{ + rtx op3_v2si, op2_v2si; + + op3_v2si = operands[3]; + if (GET_CODE (op3_v2si) == SIGN_EXTEND) + { + op3_v2si = XEXP (op3_v2si, 0); + op3_v2si = simplify_gen_subreg (DImode, op3_v2si, GET_MODE (op3_v2si), 0); + } + op3_v2si = simplify_gen_subreg (V2SImode, op3_v2si, DImode, 0); + op2_v2si = operands[2]; + if (GET_CODE (op2_v2si) == SIGN_EXTEND) + { + op2_v2si = XEXP (op2_v2si, 0); + op2_v2si = simplify_gen_subreg (DImode, op2_v2si, GET_MODE (op2_v2si), 0); + } + op2_v2si = simplify_gen_subreg (V2SImode, op2_v2si, DImode, 0); + emit_insn (gen_rotldi3 (operands[3], operands[1], GEN_INT (32))); + emit_insn (gen_mulv2si3 (op3_v2si, op3_v2si, op2_v2si)); + emit_insn (gen_umulsidi3_media (operands[4], + sh_gen_truncate (SImode, operands[1], 0), + sh_gen_truncate (SImode, operands[2], 0))); + emit_insn (gen_anddi3 (operands[0], operands[3], GEN_INT (0xffffffff00000000LL))); + emit_insn (gen_ashldi3_media (operands[3], operands[3], GEN_INT (32))); + emit_insn (gen_adddi3 (operands[0], operands[3], operands[0])); + emit_insn (gen_adddi3 (operands[0], operands[4], operands[0])); + DONE; +}) + +;; ------------------------------------------------------------------------- +;; Logical operations +;; ------------------------------------------------------------------------- + +(define_expand "andsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (and:SI (match_operand:SI 1 "logical_reg_operand" "") + (match_operand:SI 2 "logical_and_operand" "")))] + "" +{ + /* If it is possible to turn the and insn into a zero extension + already, redundant zero extensions will be folded, which results + in better code. + Ideally the splitter of *andsi_compact would be enough, if redundant + zero extensions were detected after the combine pass, which does not + happen at the moment. */ + if (TARGET_SH1) + { + if (satisfies_constraint_Jmb (operands[2])) + { + emit_insn (gen_zero_extendqisi2 (operands[0], + gen_lowpart (QImode, operands[1]))); + DONE; + } + else if (satisfies_constraint_Jmw (operands[2])) + { + emit_insn (gen_zero_extendhisi2 (operands[0], + gen_lowpart (HImode, operands[1]))); + DONE; + } + } +}) + +(define_insn_and_split "*andsi_compact" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r,z,r") + (and:SI (match_operand:SI 1 "arith_reg_operand" "%r,r,0,0") + (match_operand:SI 2 "logical_and_operand" "Jmb,Jmw,K08,r")))] + "TARGET_SH1" + "@ + extu.b %1,%0 + extu.w %1,%0 + and %2,%0 + and %2,%0" + "&& 1" + [(set (match_dup 0) (zero_extend:SI (match_dup 1)))] +{ + if (satisfies_constraint_Jmb (operands[2])) + operands[1] = gen_lowpart (QImode, operands[1]); + else if (satisfies_constraint_Jmw (operands[2])) + operands[1] = gen_lowpart (HImode, operands[1]); + else + FAIL; +} + [(set_attr "type" "arith")]) + +(define_insn "*andsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (and:SI (match_operand:SI 1 "logical_reg_operand" "%r,r") + (match_operand:SI 2 "logical_operand" "r,I10")))] + "TARGET_SHMEDIA" + "@ + and %1, %2, %0 + andi %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "*andsi3_bclr" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (and:SI (match_operand:SI 1 "arith_reg_operand" "%0") + (match_operand:SI 2 "const_int_operand" "Psz")))] + "TARGET_SH2A && satisfies_constraint_Psz (operands[2])" + "bclr %W2,%0" + [(set_attr "type" "arith")]) + +(define_insn_and_split "anddi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r,r") + (and:DI (match_operand:DI 1 "arith_reg_operand" "%r,r,r") + (match_operand:DI 2 "and_operand" "r,I10,J16")))] + "TARGET_SHMEDIA" + "@ + and %1, %2, %0 + andi %1, %2, %0 + #" + "reload_completed + && ! logical_operand (operands[2], DImode)" + [(const_int 0)] +{ + if ((unsigned)INTVAL (operands[2]) == (unsigned) 0xffffffff) + emit_insn (gen_mshflo_l_di (operands[0], operands[1], CONST0_RTX (DImode))); + else + emit_insn (gen_mshfhi_l_di (operands[0], CONST0_RTX (DImode), operands[1])); + DONE; +} + [(set_attr "type" "arith_media")]) + +(define_insn "andcsi3" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (and:SI (match_operand:SI 1 "arith_reg_operand" "r") + (not:SI (match_operand:SI 2 "arith_reg_operand" "r"))))] + "TARGET_SHMEDIA" + "andc %1,%2,%0" + [(set_attr "type" "arith_media")]) + +(define_insn "andcdi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (and:DI (match_operand:DI 1 "arith_reg_operand" "r") + (not:DI (match_operand:DI 2 "arith_reg_operand" "r"))))] + "TARGET_SHMEDIA" + "andc %1,%2,%0" + [(set_attr "type" "arith_media")]) + +(define_expand "iorsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (ior:SI (match_operand:SI 1 "logical_reg_operand" "") + (match_operand:SI 2 "logical_operand" "")))] + "" + "") + +(define_insn "*iorsi3_compact" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,z") + (ior:SI (match_operand:SI 1 "arith_reg_operand" "%0,0") + (match_operand:SI 2 "logical_operand" "r,K08")))] + "TARGET_SH1 + && !(TARGET_SH2A && satisfies_constraint_Pso (operands[2]))" + "or %2,%0" + [(set_attr "type" "arith")]) + +(define_insn "*iorsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (ior:SI (match_operand:SI 1 "logical_reg_operand" "%r,r") + (match_operand:SI 2 "logical_operand" "r,I10")))] + "TARGET_SHMEDIA" + "@ + or %1, %2, %0 + ori %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "*iorsi3_bset" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (match_operand:SI 1 "arith_reg_operand" "%0") + (match_operand:SI 2 "const_int_operand" "Pso")))] + "TARGET_SH2A && satisfies_constraint_Pso (operands[2])" + "bset %V2,%0" + [(set_attr "type" "arith")]) + +(define_insn "iordi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (ior:DI (match_operand:DI 1 "arith_reg_operand" "%r,r") + (match_operand:DI 2 "logical_operand" "r,I10")))] + "TARGET_SHMEDIA" + "@ + or %1, %2, %0 + ori %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn_and_split "*logical_sidi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (sign_extend:DI (match_operator:SI 3 "logical_operator" + [(match_operand:SI 1 "arith_reg_operand" "%r,r") + (match_operand:SI 2 "logical_operand" "r,I10")])))] + "TARGET_SHMEDIA" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 3))] +{ + operands[3] + = gen_rtx_fmt_ee (GET_CODE (operands[3]), DImode, + simplify_gen_subreg (DImode, operands[1], SImode, 0), + simplify_gen_subreg (DImode, operands[2], SImode, 0)); +}) + +(define_insn_and_split "*logical_sidisi3" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (truncate:SI (sign_extend:DI + (match_operator:SI 3 "logical_operator" + [(match_operand:SI 1 "arith_reg_operand" "%r,r") + (match_operand:SI 2 "logical_operand" "r,I10")]))))] + "TARGET_SHMEDIA" + "#" + "&& 1" + [(set (match_dup 0) (match_dup 3))]) + +(define_insn_and_split "*logical_sidi3_2" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (sign_extend:DI (truncate:SI (sign_extend:DI + (match_operator:SI 3 "logical_operator" + [(match_operand:SI 1 "arith_reg_operand" "%r,r") + (match_operand:SI 2 "logical_operand" "r,I10")])))))] + "TARGET_SHMEDIA" + "#" + "&& 1" + [(set (match_dup 0) (sign_extend:DI (match_dup 3)))]) + +(define_expand "xorsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (xor:SI (match_operand:SI 1 "logical_reg_operand" "") + (match_operand:SI 2 "xor_operand" "")))] + "" + "") + +(define_insn "*xorsi3_compact" + [(set (match_operand:SI 0 "arith_reg_dest" "=z,r") + (xor:SI (match_operand:SI 1 "arith_reg_operand" "%0,0") + (match_operand:SI 2 "logical_operand" "K08,r")))] + "TARGET_SH1" + "xor %2,%0" + [(set_attr "type" "arith")]) + +;; The *logical_op_t pattern helps combine eliminating sign/zero extensions +;; of results where one of the inputs is a T bit store. Notice that this +;; pattern must not match during reload. If reload picks this pattern it +;; will be impossible to split it afterwards. +(define_insn_and_split "*logical_op_t" + [(set (match_operand:SI 0 "arith_reg_dest") + (match_operator:SI 3 "logical_operator" + [(match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "t_reg_operand")]))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 4) (reg:SI T_REG)) + (set (match_dup 0) (match_dup 3))] +{ + operands[4] = gen_reg_rtx (SImode); + operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode, + operands[1], operands[4]); +}) + +(define_insn "*xorsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (xor:SI (match_operand:SI 1 "logical_reg_operand" "%r,r") + (match_operand:SI 2 "xor_operand" "r,I06")))] + "TARGET_SHMEDIA" + "@ + xor %1, %2, %0 + xori %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "xordi3" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (xor:DI (match_operand:DI 1 "arith_reg_operand" "%r,r") + (match_operand:DI 2 "xor_operand" "r,I06")))] + "TARGET_SHMEDIA" + "@ + xor %1, %2, %0 + xori %1, %2, %0" + [(set_attr "type" "arith_media")]) + +;; Combiner bridge pattern for 2 * sign extend -> logical op -> truncate. +;; converts 2 * sign extend -> logical op into logical op -> sign extend +(define_split + [(set (match_operand:DI 0 "arith_reg_dest" "") + (sign_extend:DI (match_operator 4 "binary_logical_operator" + [(match_operand 1 "any_register_operand" "") + (match_operand 2 "any_register_operand" "")])))] + "TARGET_SHMEDIA" + [(set (match_dup 5) (match_dup 4)) + (set (match_dup 0) (sign_extend:DI (match_dup 5)))] +{ + enum machine_mode inmode = GET_MODE (operands[1]); + int offset = 0; + + if (GET_CODE (operands[0]) == SUBREG) + { + offset = SUBREG_BYTE (operands[0]); + operands[0] = SUBREG_REG (operands[0]); + } + gcc_assert (REG_P (operands[0])); + if (TARGET_BIG_ENDIAN) + offset += 8 - GET_MODE_SIZE (inmode); + operands[5] = gen_rtx_SUBREG (inmode, operands[0], offset); +}) + +;; ------------------------------------------------------------------------- +;; Shifts and rotates +;; ------------------------------------------------------------------------- + +(define_expand "rotldi3" + [(set (match_operand:DI 0 "arith_reg_dest" "") + (rotate:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:HI 2 "mextr_bit_offset" "")))] + "TARGET_SHMEDIA" +{ + if (! mextr_bit_offset (operands[2], HImode)) + FAIL; +}) + +(define_insn "rotldi3_mextr" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (rotate:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:HI 2 "mextr_bit_offset" "i")))] + "TARGET_SHMEDIA" +{ + static char templ[16]; + sprintf (templ, "mextr%d %%1,%%1,%%0", + 8 - (int) (INTVAL (operands[2]) >> 3)); + return templ; +} + [(set_attr "type" "arith_media")]) + +(define_expand "rotrdi3" + [(set (match_operand:DI 0 "arith_reg_dest" "") + (rotatert:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:HI 2 "mextr_bit_offset" "")))] + "TARGET_SHMEDIA" +{ + if (! mextr_bit_offset (operands[2], HImode)) + FAIL; +}) + +(define_insn "rotrdi3_mextr" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (rotatert:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:HI 2 "mextr_bit_offset" "i")))] + "TARGET_SHMEDIA" +{ + static char templ[16]; + sprintf (templ, "mextr%d %%1,%%1,%%0", (int) INTVAL (operands[2]) >> 3); + return templ; +} + [(set_attr "type" "arith_media")]) + +(define_split + [(set (match_operand:DI 0 "arith_reg_dest" "") + (ior:DI (zero_extend:DI (mem:QI (match_operand 1 + "ua_address_operand" ""))) + (ashift:DI (match_operand:DI 2 "arith_reg_operand" "") + (const_int 8)))) + (clobber (match_operand:DI 3 "register_operand" ""))] + "TARGET_SHMEDIA" + [(match_dup 4) (match_dup 5)] +{ + operands[4] = ((TARGET_LITTLE_ENDIAN ? gen_ldhi_q : gen_ldlo_q) + (operands[3], operands[1])); + operands[5] = gen_mextr_rl (operands[0], operands[3], operands[2], + GEN_INT (56), GEN_INT (8)); +}) + +(define_expand "rotrsi3" + [(set (match_operand:SI 0 "arith_reg_dest") + (rotatert:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")))] + "TARGET_SH1" +{ + HOST_WIDE_INT ival = INTVAL (operands[2]); + if (ival == 1) + { + emit_insn (gen_rotrsi3_1 (operands[0], operands[1])); + DONE; + } + + FAIL; +}) + +(define_insn "rotrsi3_1" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (rotatert:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1))) + (set (reg:SI T_REG) + (and:SI (match_dup 1) (const_int 1)))] + "TARGET_SH1" + "rotr %0" + [(set_attr "type" "arith")]) + +;; A slimplified version of rotr for combine. +(define_insn "*rotrsi3_1" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (rotatert:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "rotr %0" + [(set_attr "type" "arith")]) + +(define_insn "rotlsi3_1" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (rotate:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1))) + (set (reg:SI T_REG) + (lshiftrt:SI (match_dup 1) (const_int 31)))] + "TARGET_SH1" + "rotl %0" + [(set_attr "type" "arith")]) + +;; A simplified version of rotl for combine. +(define_insn "*rotlsi3_1" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (rotate:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "rotl %0" + [(set_attr "type" "arith")]) + +(define_insn "rotlsi3_31" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (rotate:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 31))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "rotr %0" + [(set_attr "type" "arith")]) + +(define_insn "rotlsi3_16" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (rotate:SI (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 16)))] + "TARGET_SH1" + "swap.w %1,%0" + [(set_attr "type" "arith")]) + +(define_expand "rotlsi3" + [(set (match_operand:SI 0 "arith_reg_dest") + (rotate:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")))] + "TARGET_SH1" +{ + static const char rot_tab[] = { + 000, 000, 000, 000, 000, 000, 010, 001, + 001, 001, 011, 013, 003, 003, 003, 003, + 003, 003, 003, 003, 003, 013, 012, 002, + 002, 002, 010, 000, 000, 000, 000, 000, + }; + + int count = INTVAL (operands[2]); + int choice = rot_tab[count]; + if (choice & 010 && SH_DYNAMIC_SHIFT_COST <= 1) + FAIL; + choice &= 7; + switch (choice) + { + case 0: + emit_move_insn (operands[0], operands[1]); + count -= (count & 16) * 2; + break; + case 3: + emit_insn (gen_rotlsi3_16 (operands[0], operands[1])); + count -= 16; + break; + case 1: + case 2: + { + rtx parts[2]; + parts[0] = gen_reg_rtx (SImode); + parts[1] = gen_reg_rtx (SImode); + emit_insn (gen_rotlsi3_16 (parts[2-choice], operands[1])); + emit_move_insn (parts[choice-1], operands[1]); + emit_insn (gen_ashlsi3 (parts[0], parts[0], GEN_INT (8))); + emit_insn (gen_lshrsi3 (parts[1], parts[1], GEN_INT (8))); + emit_insn (gen_iorsi3 (operands[0], parts[0], parts[1])); + count = (count & ~16) - 8; + } + } + + for (; count > 0; count--) + emit_insn (gen_rotlsi3_1 (operands[0], operands[0])); + for (; count < 0; count++) + emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); + + DONE; +}) + +(define_insn "rotlhi3_8" + [(set (match_operand:HI 0 "arith_reg_dest" "=r") + (rotate:HI (match_operand:HI 1 "arith_reg_operand" "r") + (const_int 8)))] + "TARGET_SH1" + "swap.b %1,%0" + [(set_attr "type" "arith")]) + +(define_expand "rotlhi3" + [(set (match_operand:HI 0 "arith_reg_operand") + (rotate:HI (match_operand:HI 1 "arith_reg_operand") + (match_operand:HI 2 "const_int_operand")))] + "TARGET_SH1" +{ + if (INTVAL (operands[2]) != 8) + FAIL; +}) + +;; The rotcr and rotcl insns are used primarily in DImode shifts by one. +;; They can also be used to implement things like +;; bool t = a == b; +;; int x0 = (y >> 1) | (t << 31); // rotcr +;; int x1 = (y << 1) | t; // rotcl +(define_insn "rotcr" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1)) + (ashift:SI (match_operand:SI 2 "t_reg_operand") + (const_int 31)))) + (set (reg:SI T_REG) + (and:SI (match_dup 1) (const_int 1)))] + "TARGET_SH1" + "rotcr %0" + [(set_attr "type" "arith")]) + +(define_insn "rotcl" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1)) + (match_operand:SI 2 "t_reg_operand"))) + (set (reg:SI T_REG) + (lshiftrt:SI (match_dup 1) (const_int 31)))] + "TARGET_SH1" + "rotcl %0" + [(set_attr "type" "arith")]) + +;; Simplified rotcr version for combine, which allows arbitrary shift +;; amounts for the reg. If the shift amount is '1' rotcr can be used +;; directly. Otherwise we have to insert a shift in between. +(define_insn_and_split "*rotcr" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")) + (ashift:SI (match_operand:SI 3 "arith_reg_or_t_reg_operand") + (const_int 31)))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + if (INTVAL (operands[2]) > 1) + { + const rtx shift_count = GEN_INT (INTVAL (operands[2]) - 1); + rtx prev_set_t_insn = NULL_RTX; + rtx tmp_t_reg = NULL_RTX; + + /* If we're going to emit a shift sequence that clobbers the T_REG, + try to find the previous insn that sets the T_REG and emit the + shift insn before that insn, to remove the T_REG dependency. + If the insn that sets the T_REG cannot be found, store the T_REG + in a temporary reg and restore it after the shift. */ + if (sh_lshrsi_clobbers_t_reg_p (shift_count) + && ! sh_dynamicalize_shift_p (shift_count)) + { + prev_set_t_insn = prev_nonnote_insn_bb (curr_insn); + + /* Skip the nott insn, which was probably inserted by the splitter + of *rotcr_neg_t. Don't use one of the recog functions + here during insn splitting, since that causes problems in later + passes. */ + if (prev_set_t_insn != NULL_RTX) + { + rtx pat = PATTERN (prev_set_t_insn); + if (GET_CODE (pat) == SET + && t_reg_operand (XEXP (pat, 0), SImode) + && negt_reg_operand (XEXP (pat, 1), SImode)) + prev_set_t_insn = prev_nonnote_insn_bb (prev_set_t_insn); + } + + if (! (prev_set_t_insn != NULL_RTX + && reg_set_p (get_t_reg_rtx (), prev_set_t_insn) + && ! reg_referenced_p (get_t_reg_rtx (), + PATTERN (prev_set_t_insn)))) + { + prev_set_t_insn = NULL_RTX; + tmp_t_reg = gen_reg_rtx (SImode); + emit_insn (gen_move_insn (tmp_t_reg, get_t_reg_rtx ())); + } + } + + rtx shift_result = gen_reg_rtx (SImode); + rtx shift_insn = gen_lshrsi3 (shift_result, operands[1], shift_count); + operands[1] = shift_result; + + /* Emit the shift insn before the insn that sets T_REG, if possible. */ + if (prev_set_t_insn != NULL_RTX) + emit_insn_before (shift_insn, prev_set_t_insn); + else + emit_insn (shift_insn); + + /* Restore T_REG if it has been saved before. */ + if (tmp_t_reg != NULL_RTX) + emit_insn (gen_cmpgtsi_t (tmp_t_reg, const0_rtx)); + } + + /* For the rotcr insn to work, operands[3] must be in T_REG. + If it is not we can get it there by shifting it right one bit. + In this case T_REG is not an input for this insn, thus we don't have to + pay attention as of where to insert the shlr insn. */ + if (! t_reg_operand (operands[3], SImode)) + { + /* We don't care about the shifted result here, only the T_REG. */ + emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[3])); + operands[3] = get_t_reg_rtx (); + } + + emit_insn (gen_rotcr (operands[0], operands[1], operands[3])); + DONE; +}) + +;; If combine tries the same as above but with swapped operands, split +;; it so that it will try the pattern above. +(define_split + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_or_t_reg_operand") + (const_int 31)) + (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand") + (match_operand:SI 3 "const_int_operand"))))] + "TARGET_SH1 && can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (lshiftrt:SI (match_dup 2) (match_dup 3)) + (ashift:SI (match_dup 1) (const_int 31)))) + (clobber (reg:SI T_REG))])]) + +;; Basically the same as the rotcr pattern above, but for rotcl. +;; FIXME: Fold copy pasted split code for rotcr and rotcl. +(define_insn_and_split "*rotcl" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")) + (and:SI (match_operand:SI 3 "arith_reg_or_t_reg_operand") + (const_int 1)))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + gcc_assert (INTVAL (operands[2]) > 0); + + if (INTVAL (operands[2]) > 1) + { + const rtx shift_count = GEN_INT (INTVAL (operands[2]) - 1); + rtx prev_set_t_insn = NULL_RTX; + rtx tmp_t_reg = NULL_RTX; + + /* If we're going to emit a shift sequence that clobbers the T_REG, + try to find the previous insn that sets the T_REG and emit the + shift insn before that insn, to remove the T_REG dependency. + If the insn that sets the T_REG cannot be found, store the T_REG + in a temporary reg and restore it after the shift. */ + if (sh_ashlsi_clobbers_t_reg_p (shift_count) + && ! sh_dynamicalize_shift_p (shift_count)) + { + prev_set_t_insn = prev_nonnote_insn_bb (curr_insn); + + /* Skip the nott insn, which was probably inserted by the splitter + of *rotcl_neg_t. Don't use one of the recog functions + here during insn splitting, since that causes problems in later + passes. */ + if (prev_set_t_insn != NULL_RTX) + { + rtx pat = PATTERN (prev_set_t_insn); + if (GET_CODE (pat) == SET + && t_reg_operand (XEXP (pat, 0), SImode) + && negt_reg_operand (XEXP (pat, 1), SImode)) + prev_set_t_insn = prev_nonnote_insn_bb (prev_set_t_insn); + } + + if (! (prev_set_t_insn != NULL_RTX + && reg_set_p (get_t_reg_rtx (), prev_set_t_insn) + && ! reg_referenced_p (get_t_reg_rtx (), + PATTERN (prev_set_t_insn)))) + { + prev_set_t_insn = NULL_RTX; + tmp_t_reg = gen_reg_rtx (SImode); + emit_insn (gen_move_insn (tmp_t_reg, get_t_reg_rtx ())); + } + } + + rtx shift_result = gen_reg_rtx (SImode); + rtx shift_insn = gen_ashlsi3 (shift_result, operands[1], shift_count); + operands[1] = shift_result; + + /* Emit the shift insn before the insn that sets T_REG, if possible. */ + if (prev_set_t_insn != NULL_RTX) + emit_insn_before (shift_insn, prev_set_t_insn); + else + emit_insn (shift_insn); + + /* Restore T_REG if it has been saved before. */ + if (tmp_t_reg != NULL_RTX) + emit_insn (gen_cmpgtsi_t (tmp_t_reg, const0_rtx)); + } + + /* For the rotcl insn to work, operands[3] must be in T_REG. + If it is not we can get it there by shifting it right one bit. + In this case T_REG is not an input for this insn, thus we don't have to + pay attention as of where to insert the shlr insn. */ + if (! t_reg_operand (operands[3], SImode)) + { + /* We don't care about the shifted result here, only the T_REG. */ + emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[3])); + operands[3] = get_t_reg_rtx (); + } + + emit_insn (gen_rotcl (operands[0], operands[1], operands[3])); + DONE; +}) + +;; rotcl combine pattern variations +(define_insn_and_split "*rotcl" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")) + (match_operand:SI 3 "t_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (ashift:SI (match_dup 1) (match_dup 2)) + (and:SI (match_dup 3) (const_int 1)))) + (clobber (reg:SI T_REG))])]) + +(define_insn_and_split "*rotcl" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (and:SI (match_operand:SI 1 "arith_reg_or_t_reg_operand") + (const_int 1)) + (ashift:SI (match_operand:SI 2 "arith_reg_operand") + (match_operand:SI 3 "const_int_operand")))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (ashift:SI (match_dup 2) (match_dup 3)) + (and:SI (match_dup 1) (const_int 1)))) + (clobber (reg:SI T_REG))])]) + +(define_insn_and_split "*rotcl" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")) + (lshiftrt:SI (match_operand:SI 3 "arith_reg_operand") + (const_int 31)))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (ashift:SI (match_dup 1) (match_dup 2)) + (and:SI (reg:SI T_REG) (const_int 1)))) + (clobber (reg:SI T_REG))])] +{ + /* We don't care about the result of the left shift, only the T_REG. */ + emit_insn (gen_shll (gen_reg_rtx (SImode), operands[3])); +}) + +(define_insn_and_split "*rotcl" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (lshiftrt:SI (match_operand:SI 3 "arith_reg_operand") + (const_int 31)) + (ashift:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (ashift:SI (match_dup 1) (match_dup 2)) + (and:SI (reg:SI T_REG) (const_int 1)))) + (clobber (reg:SI T_REG))])] +{ + /* We don't care about the result of the left shift, only the T_REG. */ + emit_insn (gen_shll (gen_reg_rtx (SImode), operands[3])); +}) + +;; rotcr combine bridge pattern which will make combine try out more +;; complex patterns. +(define_insn_and_split "*rotcr" + [(set (match_operand:SI 0 "arith_reg_dest") + (ashift:SI (match_operand:SI 1 "t_reg_operand") (const_int 31)))] + "TARGET_SH1" + "#" + "&& 1" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (match_dup 0) + (ior:SI (lshiftrt:SI (match_dup 0) (const_int 1)) + (ashift:SI (match_dup 1) (const_int 31)))) + (set (reg:SI T_REG) + (and:SI (match_dup 0) (const_int 1)))])]) + +(define_insn_and_split "*rotcr" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand") + (const_int -2147483648)) ;; 0xffffffff80000000 + (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand") + (const_int 1)))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_shll (tmp, operands[1])); + emit_insn (gen_rotcr (operands[0], operands[2], get_t_reg_rtx ())); + DONE; +}) + +;; rotcr combine patterns for rotating in the negated T_REG value. +(define_insn_and_split "*rotcr_neg_t" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (match_operand:SI 1 "negt_reg_shl31_operand") + (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand") + (match_operand:SI 3 "const_int_operand")))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (lshiftrt:SI (match_dup 2) (match_dup 3)) + (ashift:SI (reg:SI T_REG) (const_int 31)))) + (clobber (reg:SI T_REG))])] +{ + emit_insn (gen_nott (get_t_reg_rtx ())); +}) + +(define_insn_and_split "*rotcr_neg_t" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")) + (match_operand:SI 3 "negt_reg_shl31_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (lshiftrt:SI (match_dup 1) (match_dup 2)) + (ashift:SI (reg:SI T_REG) (const_int 31)))) + (clobber (reg:SI T_REG))])] +{ + emit_insn (gen_nott (get_t_reg_rtx ())); +}) + +;; rotcl combine patterns for rotating in the negated T_REG value. +;; For some strange reason these have to be specified as splits which combine +;; will pick up. If they are specified as insn_and_split like the +;; *rotcr_neg_t patterns above, combine would recognize them successfully +;; but not emit them on non-SH2A targets. +(define_split + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (match_operand:SI 1 "negt_reg_operand") + (ashift:SI (match_operand:SI 2 "arith_reg_operand") + (match_operand:SI 3 "const_int_operand"))))] + "TARGET_SH1" + [(set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1))) + (parallel [(set (match_dup 0) + (ior:SI (ashift:SI (match_dup 2) (match_dup 3)) + (and:SI (reg:SI T_REG) (const_int 1)))) + (clobber (reg:SI T_REG))])]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (ashift:SI (match_operand:SI 2 "arith_reg_operand") + (match_operand:SI 3 "const_int_operand")) + (match_operand:SI 1 "negt_reg_operand")))] + "TARGET_SH1" + [(set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1))) + (parallel [(set (match_dup 0) + (ior:SI (ashift:SI (match_dup 2) (match_dup 3)) + (and:SI (reg:SI T_REG) (const_int 1)))) + (clobber (reg:SI T_REG))])]) + +;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +;; SImode shift left + +(define_expand "ashlsi3" + [(set (match_operand:SI 0 "arith_reg_operand" "") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "shift_count_operand" "")))] + "" +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2])); + DONE; + } + if (TARGET_DYNSHIFT + && CONST_INT_P (operands[2]) && sh_dynamicalize_shift_p (operands[2])) + operands[2] = force_reg (SImode, operands[2]); + + /* If the ashlsi3_* insn is going to clobber the T_REG it must be + expanded here. */ + if (CONST_INT_P (operands[2]) + && sh_ashlsi_clobbers_t_reg_p (operands[2]) + && ! sh_dynamicalize_shift_p (operands[2])) + { + emit_insn (gen_ashlsi3_n_clobbers_t (operands[0], operands[1], + operands[2])); + DONE; + } + + /* Expand a library call for the dynamic shift. */ + if (!CONST_INT_P (operands[2]) && !TARGET_DYNSHIFT) + { + emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]); + rtx funcaddr = gen_reg_rtx (Pmode); + function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC); + emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr)); + + DONE; + } +}) + +(define_insn "ashlsi3_k" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0,0") + (match_operand:SI 2 "p27_shift_count_operand" "M,P27")))] + "TARGET_SH1" + "@ + add %0,%0 + shll%O2 %0" + [(set_attr "type" "arith")]) + +(define_insn_and_split "ashlsi3_d" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "shift_count_operand" "r")))] + "TARGET_DYNSHIFT" + "shld %2,%0" + "&& CONST_INT_P (operands[2]) && ! sh_dynamicalize_shift_p (operands[2]) + && ! sh_ashlsi_clobbers_t_reg_p (operands[2])" + [(const_int 0)] +{ + if (satisfies_constraint_P27 (operands[2])) + { + emit_insn (gen_ashlsi3_k (operands[0], operands[1], operands[2])); + DONE; + } + else if (! satisfies_constraint_P27 (operands[2])) + { + /* This must happen before reload, otherwise the constant will be moved + into a register due to the "r" constraint, after which this split + cannot be done anymore. + Unfortunately the move insn will not always be eliminated. + Also, here we must not create a shift sequence that clobbers the + T_REG. */ + emit_move_insn (operands[0], operands[1]); + gen_shifty_op (ASHIFT, operands); + DONE; + } + + FAIL; +} + [(set_attr "type" "dyn_shift")]) + +;; If dynamic shifts are not available use a library function. +;; By specifying the pattern we reduce the number of call clobbered regs. +;; In order to make combine understand the truncation of the shift amount +;; operand we have to allow it to use pseudo regs for the shift operands. +(define_insn "ashlsi3_d_call" + [(set (match_operand:SI 0 "arith_reg_dest" "=z") + (ashift:SI (reg:SI R4_REG) + (and:SI (match_operand:SI 1 "arith_reg_operand" "z") + (const_int 31)))) + (use (match_operand:SI 2 "arith_reg_operand" "r")) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SH1 && !TARGET_DYNSHIFT" + "jsr @%2%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn_and_split "ashlsi3_n" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "not_p27_shift_count_operand" "")))] + "TARGET_SH1 && ! sh_ashlsi_clobbers_t_reg_p (operands[2])" + "#" + "&& (reload_completed + || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))" + [(const_int 0)] +{ + if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()) + { + /* If this pattern was picked and dynamic shifts are supported, switch + to dynamic shift pattern before reload. */ + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_ashlsi3_d (operands[0], operands[1], operands[2])); + } + else + gen_shifty_op (ASHIFT, operands); + + DONE; +}) + +(define_insn_and_split "ashlsi3_n_clobbers_t" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "not_p27_shift_count_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && sh_ashlsi_clobbers_t_reg_p (operands[2])" + "#" + "&& (reload_completed || INTVAL (operands[2]) == 31 + || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))" + [(const_int 0)] +{ + if (INTVAL (operands[2]) == 31) + { + /* If the shift amount is 31 we split into a different sequence before + reload so that it gets a chance to allocate R0 for the sequence. + If it fails to do so (due to pressure on R0), it will take one insn + more for the and. */ + emit_insn (gen_andsi3 (operands[0], operands[1], const1_rtx)); + emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); + } + else if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()) + { + /* If this pattern was picked and dynamic shifts are supported, switch + to dynamic shift pattern before reload. */ + operands[2] = force_reg (SImode, operands[2]); + emit_insn (gen_ashlsi3_d (operands[0], operands[1], operands[2])); + } + else + gen_shifty_op (ASHIFT, operands); + + DONE; +}) + +(define_insn "shll" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") (const_int 1))) + (set (reg:SI T_REG) + (lt:SI (match_dup 1) (const_int 0)))] + "TARGET_SH1" + "shll %0" + [(set_attr "type" "arith")]) + +(define_insn "*ashlsi_c_void" + [(set (reg:SI T_REG) + (lt:SI (match_operand:SI 0 "arith_reg_operand" "r") (const_int 0))) + (clobber (match_scratch:SI 1 "=0"))] + "TARGET_SH1 && cse_not_expected" + "shll %0" + [(set_attr "type" "arith")]) + +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_dest" "") (const_int 0)) + (set (reg:SI T_REG) + (gt:SI (match_dup 0) (match_operand:SI 1 "arith_reg_operand" "")))] + "TARGET_SH1 + && peep2_reg_dead_p (2, operands[0]) + && peep2_reg_dead_p (2, operands[1])" + [(const_int 0)] +{ + emit_insn (gen_shll (operands[1], operands[1])); + DONE; +}) + +(define_insn "ashlsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (ashift:SI (match_operand:SI 1 "extend_reg_operand" "r,r") + (match_operand:SI 2 "shift_count_operand" "r,n")))] + "TARGET_SHMEDIA" + "@ + shlld.l %1, %2, %0 + shlli.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +;; HImode shift left + +(define_expand "ashlhi3" + [(parallel [(set (match_operand:HI 0 "arith_reg_operand" "") + (ashift:HI (match_operand:HI 1 "arith_reg_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:SI T_REG))])] + "TARGET_SH1" +{ + if (!CONST_INT_P (operands[2])) + FAIL; + /* It may be possible to call gen_ashlhi3 directly with more generic + operands. Make sure operands[1] is a HImode register here. */ + if (!arith_reg_operand (operands[1], HImode)) + operands[1] = copy_to_mode_reg (HImode, operands[1]); +}) + +(define_insn "ashlhi3_k" + [(set (match_operand:HI 0 "arith_reg_dest" "=r,r") + (ashift:HI (match_operand:HI 1 "arith_reg_operand" "0,0") + (match_operand:HI 2 "const_int_operand" "M,P27")))] + "TARGET_SH1 && satisfies_constraint_P27 (operands[2])" + "@ + add %0,%0 + shll%O2 %0" + [(set_attr "type" "arith")]) + +(define_insn_and_split "*ashlhi3_n" + [(set (match_operand:HI 0 "arith_reg_dest" "=r") + (ashift:HI (match_operand:HI 1 "arith_reg_operand" "0") + (match_operand:HI 2 "const_int_operand" "n"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& reload_completed" + [(use (reg:SI R0_REG))] +{ + gen_shifty_hi_op (ASHIFT, operands); + DONE; +}) + +;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +;; DImode shift left + +(define_expand "ashldi3" + [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "") + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:DI 2 "immediate_operand" ""))) + (clobber (reg:SI T_REG))])] + "" +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2])); + DONE; + } + if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1) + { + emit_insn (gen_ashldi3_k (operands[0], operands[1])); + DONE; + } + else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 32) + { + emit_insn (gen_ashldi3_std (operands[0], operands[1], operands[2])); + DONE; + } + else + FAIL; +}) + +;; Expander for DImode shift left with SImode operations. +(define_expand "ashldi3_std" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_SH1 && INTVAL (operands[2]) < 32" +{ + rtx low_src = gen_lowpart (SImode, operands[1]); + rtx high_src = gen_highpart (SImode, operands[1]); + rtx dst = gen_reg_rtx (DImode); + rtx low_dst = gen_lowpart (SImode, dst); + rtx high_dst = gen_highpart (SImode, dst); + rtx tmp0 = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (SImode); + + emit_insn (gen_lshrsi3 (tmp0, low_src, GEN_INT (32 - INTVAL (operands[2])))); + emit_insn (gen_ashlsi3 (low_dst, low_src, operands[2])); + emit_insn (gen_ashlsi3 (tmp1, high_src, operands[2])); + emit_insn (gen_iorsi3 (high_dst, tmp0, tmp1)); + emit_move_insn (operands[0], dst); + DONE; +}) + +(define_insn_and_split "ashldi3_k" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0") + (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx high = gen_highpart (SImode, operands[0]); + rtx low = gen_lowpart (SImode, operands[0]); + emit_insn (gen_shll (low, low)); + emit_insn (gen_rotcl (high, high, get_t_reg_rtx ())); + DONE; +}) + +(define_insn "ashldi3_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "r,r") + (match_operand:DI 2 "shift_count_operand" "r,n")))] + "TARGET_SHMEDIA" + "@ + shlld %1, %2, %0 + shlli %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "*ashldisi3_media" + [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0) + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_SHMEDIA && INTVAL (operands[2]) < 32" + "shlli.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +;; SImode arithmetic shift right +;; +;; We can't do HImode right shifts correctly unless we start out with an +;; explicit zero / sign extension; doing that would result in worse overall +;; code, so just let the machine independent code widen the mode. +;; That's why we don't have ashrhi3_k / lshrhi3_k / lshrhi3_m / lshrhi3 . + +(define_expand "ashrsi3" + [(parallel [(set (match_operand:SI 0 "arith_reg_dest" "") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "nonmemory_operand" ""))) + (clobber (reg:SI T_REG))])] + "" +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_ashrsi3_media (operands[0], operands[1], operands[2])); + DONE; + } + if (expand_ashiftrt (operands)) + DONE; + else + FAIL; +}) + +(define_insn "shar" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1))) + (set (reg:SI T_REG) + (and:SI (match_dup 1) (const_int 1)))] + "TARGET_SH1" + "shar %0" + [(set_attr "type" "arith")]) + +(define_insn "ashrsi3_k" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "const_int_operand" "M"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && INTVAL (operands[2]) == 1" + "shar %0" + [(set_attr "type" "arith")]) + +(define_insn_and_split "ashrsi2_16" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 16)))] + "TARGET_SH1" + "#" + "&& 1" + [(set (match_dup 0) (rotate:SI (match_dup 1) (const_int 16))) + (set (match_dup 0) (sign_extend:SI (match_dup 2)))] +{ + operands[2] = gen_lowpart (HImode, operands[0]); +}) + +(define_insn_and_split "ashrsi2_31" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 31))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(const_int 0)] +{ + emit_insn (gen_shll (operands[0], operands[1])); + emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ())); + DONE; +}) + +(define_insn "ashrsi3_d" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))] + "TARGET_DYNSHIFT" + "shad %2,%0" + [(set_attr "type" "dyn_shift")]) + +(define_insn "ashrsi3_n" + [(set (reg:SI R4_REG) + (ashiftrt:SI (reg:SI R4_REG) + (match_operand:SI 0 "const_int_operand" "i"))) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH1" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "ashrsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (ashiftrt:SI (match_operand:SI 1 "extend_reg_operand" "r,r") + (match_operand:SI 2 "shift_count_operand" "r,n")))] + "TARGET_SHMEDIA" + "@ + shard.l %1, %2, %0 + shari.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +;; DImode arithmetic shift right + +(define_expand "ashrdi3" + [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "") + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:DI 2 "immediate_operand" ""))) + (clobber (reg:SI T_REG))])] + "" +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_ashrdi3_media (operands[0], operands[1], operands[2])); + DONE; + } + if (!CONST_INT_P (operands[2]) || INTVAL (operands[2]) != 1) + FAIL; +}) + +(define_insn_and_split "ashrdi3_k" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0") + (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx high = gen_highpart (SImode, operands[0]); + rtx low = gen_lowpart (SImode, operands[0]); + emit_insn (gen_shar (high, high)); + emit_insn (gen_rotcr (low, low, get_t_reg_rtx ())); + DONE; +}) + +(define_insn "ashrdi3_media" + [(set (match_operand:DI 0 "ext_dest_operand" "=r,r") + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r,r") + (match_operand:DI 2 "shift_count_operand" "r,n")))] + "TARGET_SHMEDIA + && (arith_reg_dest (operands[0], DImode) + || (CONST_INT_P (operands[2]) && INTVAL (operands[2]) >= 32))" + "@ + shard %1, %2, %0 + shari %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "*ashrdisi3_media" + [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0) + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_SHMEDIA && INTVAL (operands[2]) < 32" + "shari.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "ashrdisi3_media_high" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (truncate:SI + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "const_int_operand" "n"))))] + "TARGET_SHMEDIA && INTVAL (operands[2]) >= 32" + "shari %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "ashrdisi3_media_opaque" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (unspec:SI [(match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")] + UNSPEC_ASHIFTRT))] + "TARGET_SHMEDIA" + "shari %1, %2, %0" + [(set_attr "type" "arith_media")]) + +;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +;; SImode logical shift right + +(define_expand "lshrsi3" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "shift_count_operand" "")))] + "" +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_lshrsi3_media (operands[0], operands[1], operands[2])); + DONE; + } + + /* If a dynamic shift is supposed to be used, expand the lshrsi3_d insn + here, otherwise the pattern will never match due to the shift amount reg + negation. */ + if (TARGET_DYNSHIFT + && CONST_INT_P (operands[2]) && sh_dynamicalize_shift_p (operands[2])) + { + rtx neg_count = force_reg (SImode, + gen_int_mode (- INTVAL (operands[2]), SImode)); + emit_insn (gen_lshrsi3_d (operands[0], operands[1], neg_count)); + DONE; + } + + if (TARGET_DYNSHIFT && ! CONST_INT_P (operands[2])) + { + rtx neg_count = gen_reg_rtx (SImode); + emit_insn (gen_negsi2 (neg_count, operands[2])); + emit_insn (gen_lshrsi3_d (operands[0], operands[1], neg_count)); + DONE; + } + + /* If the lshrsi3_* insn is going to clobber the T_REG it must be + expanded here. */ + if (CONST_INT_P (operands[2]) + && sh_lshrsi_clobbers_t_reg_p (operands[2]) + && ! sh_dynamicalize_shift_p (operands[2])) + { + emit_insn (gen_lshrsi3_n_clobbers_t (operands[0], operands[1], + operands[2])); + DONE; + } + + /* Expand a library call for the dynamic shift. */ + if (!CONST_INT_P (operands[2]) && !TARGET_DYNSHIFT) + { + emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]); + rtx funcaddr = gen_reg_rtx (Pmode); + function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC); + emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr)); + DONE; + } +}) + +(define_insn "lshrsi3_k" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "p27_rshift_count_operand" "P27")))] + "TARGET_SH1" + "shlr%O2 %0" + [(set_attr "type" "arith")]) + +(define_insn_and_split "lshrsi3_d" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (neg:SI (match_operand:SI 2 "shift_count_operand" "r"))))] + "TARGET_DYNSHIFT" + "shld %2,%0" + "&& CONST_INT_P (operands[2]) && ! sh_dynamicalize_shift_p (operands[2]) + && ! sh_lshrsi_clobbers_t_reg_p (operands[2])" + [(const_int 0)] +{ + if (satisfies_constraint_P27 (operands[2])) + { + /* This will not be done for a shift amount of 1, because it would + clobber the T_REG. */ + emit_insn (gen_lshrsi3_k (operands[0], operands[1], operands[2])); + DONE; + } + else if (! satisfies_constraint_P27 (operands[2])) + { + /* This must happen before reload, otherwise the constant will be moved + into a register due to the "r" constraint, after which this split + cannot be done anymore. + Unfortunately the move insn will not always be eliminated. + Also, here we must not create a shift sequence that clobbers the + T_REG. */ + emit_move_insn (operands[0], operands[1]); + gen_shifty_op (LSHIFTRT, operands); + DONE; + } + + FAIL; +} + [(set_attr "type" "dyn_shift")]) + +;; If dynamic shifts are not available use a library function. +;; By specifying the pattern we reduce the number of call clobbered regs. +;; In order to make combine understand the truncation of the shift amount +;; operand we have to allow it to use pseudo regs for the shift operands. +(define_insn "lshrsi3_d_call" + [(set (match_operand:SI 0 "arith_reg_dest" "=z") + (lshiftrt:SI (reg:SI R4_REG) + (and:SI (match_operand:SI 1 "arith_reg_operand" "z") + (const_int 31)))) + (use (match_operand:SI 2 "arith_reg_operand" "r")) + (clobber (reg:SI T_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SH1 && !TARGET_DYNSHIFT" + "jsr @%2%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn_and_split "lshrsi3_n" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "not_p27_rshift_count_operand")))] + "TARGET_SH1 && ! sh_lshrsi_clobbers_t_reg_p (operands[2])" + "#" + "&& (reload_completed + || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))" + [(const_int 0)] +{ + if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()) + { + /* If this pattern was picked and dynamic shifts are supported, switch + to dynamic shift pattern before reload. */ + operands[2] = force_reg (SImode, + gen_int_mode (- INTVAL (operands[2]), SImode)); + emit_insn (gen_lshrsi3_d (operands[0], operands[1], operands[2])); + } + else + gen_shifty_op (LSHIFTRT, operands); + + DONE; +}) + +;; The lshrsi3_n_clobbers_t pattern also works as a simplified version of +;; the shlr pattern. +(define_insn_and_split "lshrsi3_n_clobbers_t" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand:SI 2 "not_p27_rshift_count_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && sh_lshrsi_clobbers_t_reg_p (operands[2])" + "#" + "&& (reload_completed || INTVAL (operands[2]) == 31 + || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))" + [(const_int 0)] +{ + if (INTVAL (operands[2]) == 31) + { + emit_insn (gen_shll (operands[0], operands[1])); + emit_insn (gen_movt (operands[0], get_t_reg_rtx ())); + } + else if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()) + { + /* If this pattern was picked and dynamic shifts are supported, switch + to dynamic shift pattern before reload. */ + operands[2] = force_reg (SImode, + gen_int_mode (- INTVAL (operands[2]), SImode)); + emit_insn (gen_lshrsi3_d (operands[0], operands[1], operands[2])); + } + else + gen_shifty_op (LSHIFTRT, operands); + + DONE; +}) + +(define_insn "shlr" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1))) + (set (reg:SI T_REG) + (and:SI (match_dup 1) (const_int 1)))] + "TARGET_SH1" + "shlr %0" + [(set_attr "type" "arith")]) + +(define_insn "lshrsi3_media" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (lshiftrt:SI (match_operand:SI 1 "extend_reg_operand" "r,r") + (match_operand:SI 2 "shift_count_operand" "r,n")))] + "TARGET_SHMEDIA" + "@ + shlrd.l %1, %2, %0 + shlri.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +;; DImode logical shift right + +(define_expand "lshrdi3" + [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "") + (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "") + (match_operand:DI 2 "immediate_operand" ""))) + (clobber (reg:SI T_REG))])] + "" +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_lshrdi3_media (operands[0], operands[1], operands[2])); + DONE; + } + if (!CONST_INT_P (operands[2]) || INTVAL (operands[2]) != 1) + FAIL; +}) + +(define_insn_and_split "lshrdi3_k" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0") + (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx high = gen_highpart (SImode, operands[0]); + rtx low = gen_lowpart (SImode, operands[0]); + emit_insn (gen_shlr (high, high)); + emit_insn (gen_rotcr (low, low, get_t_reg_rtx ())); + DONE; +}) + +(define_insn "lshrdi3_media" + [(set (match_operand:DI 0 "ext_dest_operand" "=r,r") + (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r,r") + (match_operand:DI 2 "shift_count_operand" "r,n")))] + "TARGET_SHMEDIA + && (arith_reg_dest (operands[0], DImode) + || (CONST_INT_P (operands[2]) && INTVAL (operands[2]) > 32))" + "@ + shlrd %1, %2, %0 + shlri %1, %2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "*lshrdisi3_media" + [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0) + (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_SHMEDIA && INTVAL (operands[2]) < 32" + "shlri.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +;; Combined left/right shifts + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "const_int_operand" "")))] + "TARGET_SH1 && reload_completed && (unsigned)INTVAL (operands[2]) < 32" + [(use (reg:SI R0_REG))] +{ + if (gen_shl_and (operands[0], operands[2], operands[3], operands[1])) + FAIL; + DONE; +}) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "const_int_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && reload_completed && (unsigned)INTVAL (operands[2]) < 32" + [(use (reg:SI R0_REG))] +{ + if (gen_shl_and (operands[0], operands[2], operands[3], operands[1])) + FAIL; + DONE; +}) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=r") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && shl_and_kind (operands[2], operands[3], 0) == 1" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shl_and_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_and_length (insn)") (const_int 4)) + (const_string "8") + (eq (symbol_ref "shl_and_length (insn)") (const_int 5)) + (const_string "10") + (eq (symbol_ref "shl_and_length (insn)") (const_int 6)) + (const_string "12") + (eq (symbol_ref "shl_and_length (insn)") (const_int 7)) + (const_string "14") + (eq (symbol_ref "shl_and_length (insn)") (const_int 8)) + (const_string "16")] + (const_string "18"))) + (set_attr "type" "arith")]) + +(define_insn "" + [(set (match_operand:SI 0 "register_operand" "=z") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && shl_and_kind (operands[2], operands[3], 0) == 2" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shl_and_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_and_length (insn)") (const_int 4)) + (const_string "8")] + (const_string "10"))) + (set_attr "type" "arith")]) + +;; shift left / and combination with a scratch register: The combine pass +;; does not accept the individual instructions, even though they are +;; cheap. But it needs a precise description so that it is usable after +;; reload. +(define_insn "and_shl_scratch" + [(set (match_operand:SI 0 "register_operand" "=r,&r") + (lshiftrt:SI + (ashift:SI + (and:SI + (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,0") + (match_operand:SI 2 "const_int_operand" "N,n")) + (match_operand:SI 3 "" "0,r")) + (match_operand:SI 4 "const_int_operand" "n,n")) + (match_operand:SI 5 "const_int_operand" "n,n"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_and_scr_length (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 4)) + (const_string "8") + (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 5)) + (const_string "10")] + (const_string "12"))) + (set_attr "type" "arith")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (lshiftrt:SI + (ashift:SI + (and:SI + (lshiftrt:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "register_operand" "")) + (match_operand:SI 4 "const_int_operand" "")) + (match_operand:SI 5 "const_int_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + [(use (reg:SI R0_REG))] +{ + rtx and_source = operands[rtx_equal_p (operands[0], operands[1]) ? 3 : 1]; + + if (INTVAL (operands[2])) + { + gen_shifty_op (LSHIFTRT, operands); + } + emit_insn (gen_andsi3 (operands[0], operands[0], and_source)); + operands[2] = operands[4]; + gen_shifty_op (ASHIFT, operands); + if (INTVAL (operands[5])) + { + operands[2] = operands[5]; + gen_shifty_op (LSHIFTRT, operands); + } + DONE; +}) + +;; signed left/right shift combination. +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI + (ashift:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "const_int_operand" "")) + (match_operand:SI 3 "const_int_operand" "") + (const_int 0))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + [(use (reg:SI R0_REG))] +{ + if (gen_shl_sext (operands[0], operands[2], operands[3], operands[1])) + FAIL; + DONE; +}) + +(define_insn "shl_sext_ext" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI + (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n") + (const_int 0))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && (unsigned)shl_sext_kind (operands[2], operands[3], 0) - 1 < 5" + "#" + [(set (attr "length") + (cond [(match_test "shl_sext_length (insn)") + (const_string "2") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 2)) + (const_string "4") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 4)) + (const_string "8") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 5)) + (const_string "10") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 6)) + (const_string "12") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 7)) + (const_string "14") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 8)) + (const_string "16")] + (const_string "18"))) + (set_attr "type" "arith")]) + +(define_insn "shl_sext_sub" + [(set (match_operand:SI 0 "register_operand" "=z") + (sign_extract:SI + (ashift:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 2 "const_int_operand" "n")) + (match_operand:SI 3 "const_int_operand" "n") + (const_int 0))) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && (shl_sext_kind (operands[2], operands[3], 0) & ~1) == 6" + "#" + [(set (attr "length") + (cond [(eq (symbol_ref "shl_sext_length (insn)") (const_int 3)) + (const_string "6") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 4)) + (const_string "8") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 5)) + (const_string "10") + (eq (symbol_ref "shl_sext_length (insn)") (const_int 6)) + (const_string "12")] + (const_string "14"))) + (set_attr "type" "arith")]) + +;; The xtrct_left and xtrct_right patterns are used in expansions of DImode +;; shifts by 16, and allow the xtrct instruction to be generated from C +;; source. +(define_insn "xtrct_left" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 16)) + (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand" "0") + (const_int 16))))] + "TARGET_SH1" + "xtrct %1,%0" + [(set_attr "type" "arith")]) + +(define_insn "xtrct_right" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 16)) + (ashift:SI (match_operand:SI 2 "arith_reg_operand" "r") + (const_int 16))))] + "TARGET_SH1" + "xtrct %2,%0" + [(set_attr "type" "arith")]) + +;; ------------------------------------------------------------------------- +;; Unary arithmetic +;; ------------------------------------------------------------------------- + +(define_insn "negc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (neg:SI (plus:SI (reg:SI T_REG) + (match_operand:SI 1 "arith_reg_operand" "r")))) + (set (reg:SI T_REG) + (ne:SI (ior:SI (reg:SI T_REG) (match_dup 1)) + (const_int 0)))] + "TARGET_SH1" + "negc %1,%0" + [(set_attr "type" "arith")]) + +;; A simplified version of the negc insn, where the exact value of the +;; T bit doesn't matter. This is easier for combine to pick up. +;; Notice that '0 - x - 1' is the same as '~x', thus we don't specify +;; extra patterns for this case. +(define_insn "*negc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (minus:SI (neg:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand:SI 2 "t_reg_operand" ""))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "negc %1,%0" + [(set_attr "type" "arith")]) + +(define_insn "*negdi_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (neg:DI (match_operand:DI 1 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "sub r63, %1, %0" + [(set_attr "type" "arith_media")]) + +;; Don't split into individual negc insns immediately so that neg:DI (abs:DI) +;; can be combined. +(define_expand "negdi2" + [(parallel [(set (match_operand:DI 0 "arith_reg_dest") + (neg:DI (match_operand:DI 1 "arith_reg_operand"))) + (clobber (reg:SI T_REG))])] + "TARGET_SH1") + +(define_insn_and_split "*negdi2" + [(set (match_operand:DI 0 "arith_reg_dest") + (neg:DI (match_operand:DI 1 "arith_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + emit_insn (gen_clrt ()); + emit_insn (gen_negc (gen_lowpart (SImode, operands[0]), + gen_lowpart (SImode, operands[1]))); + emit_insn (gen_negc (gen_highpart (SImode, operands[0]), + gen_highpart (SImode, operands[1]))); + DONE; +}) + +(define_insn "negsi2" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (neg:SI (match_operand:SI 1 "arith_reg_operand" "r")))] + "TARGET_SH1" + "neg %1,%0" + [(set_attr "type" "arith")]) + +(define_insn_and_split "one_cmplsi2" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (not:SI (match_operand:SI 1 "arith_reg_operand" "r")))] + "TARGET_SH1" + "not %1,%0" + "&& can_create_pseudo_p ()" + [(set (reg:SI T_REG) (ge:SI (match_dup 1) (const_int 0))) + (set (match_dup 0) (reg:SI T_REG))] +{ +/* PR 54685 + If the result of 'unsigned int <= 0x7FFFFFFF' ends up as the following + sequence: + + (set (reg0) (not:SI (reg0) (reg1))) + (parallel [(set (reg2) (lshiftrt:SI (reg0) (const_int 31))) + (clobber (reg:SI T_REG))]) + + ... match and combine the sequence manually in the split pass after the + combine pass. Notice that combine does try the target pattern of this + split, but if the pattern is added it interferes with other patterns, in + particular with the div0s comparisons. + This could also be done with a peephole but doing it here before register + allocation can save one temporary. + When we're here, the not:SI pattern obviously has been matched already + and we only have to see whether the following insn is the left shift. */ + + rtx i = next_nonnote_insn_bb (curr_insn); + if (i == NULL_RTX || !NONJUMP_INSN_P (i)) + FAIL; + + rtx p = PATTERN (i); + if (GET_CODE (p) != PARALLEL || XVECLEN (p, 0) != 2) + FAIL; + + rtx p0 = XVECEXP (p, 0, 0); + rtx p1 = XVECEXP (p, 0, 1); + + if (/* (set (reg2) (lshiftrt:SI (reg0) (const_int 31))) */ + GET_CODE (p0) == SET + && GET_CODE (XEXP (p0, 1)) == LSHIFTRT + && REG_P (XEXP (XEXP (p0, 1), 0)) + && REGNO (XEXP (XEXP (p0, 1), 0)) == REGNO (operands[0]) + && CONST_INT_P (XEXP (XEXP (p0, 1), 1)) + && INTVAL (XEXP (XEXP (p0, 1), 1)) == 31 + + /* (clobber (reg:SI T_REG)) */ + && GET_CODE (p1) == CLOBBER && REG_P (XEXP (p1, 0)) + && REGNO (XEXP (p1, 0)) == T_REG) + { + operands[0] = XEXP (p0, 0); + set_insn_deleted (i); + } + else + FAIL; +} + [(set_attr "type" "arith")]) + +(define_expand "one_cmpldi2" + [(set (match_operand:DI 0 "arith_reg_dest" "") + (xor:DI (match_operand:DI 1 "arith_reg_operand" "") + (const_int -1)))] + "TARGET_SHMEDIA" "") + +(define_expand "abs<mode>2" + [(parallel [(set (match_operand:SIDI 0 "arith_reg_dest") + (abs:SIDI (match_operand:SIDI 1 "arith_reg_operand"))) + (clobber (reg:SI T_REG))])] + "TARGET_SH1") + +(define_insn_and_split "*abs<mode>2" + [(set (match_operand:SIDI 0 "arith_reg_dest") + (abs:SIDI (match_operand:SIDI 1 "arith_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + if (<MODE>mode == SImode) + emit_insn (gen_cmpgesi_t (operands[1], const0_rtx)); + else + { + rtx high_src = gen_highpart (SImode, operands[1]); + emit_insn (gen_cmpgesi_t (high_src, const0_rtx)); + } + + emit_insn (gen_neg<mode>_cond (operands[0], operands[1], operands[1], + const1_rtx)); + DONE; +}) + +(define_insn_and_split "*negabs<mode>2" + [(set (match_operand:SIDI 0 "arith_reg_dest") + (neg:SIDI (abs:SIDI (match_operand:SIDI 1 "arith_reg_operand")))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + if (<MODE>mode == SImode) + emit_insn (gen_cmpgesi_t (operands[1], const0_rtx)); + else + { + rtx high_src = gen_highpart (SImode, operands[1]); + emit_insn (gen_cmpgesi_t (high_src, const0_rtx)); + } + + emit_insn (gen_neg<mode>_cond (operands[0], operands[1], operands[1], + const0_rtx)); + DONE; +}) + +;; The SH4 202 can do zero-offset branches without pipeline stalls. +;; This can be used as some kind of conditional execution, which is useful +;; for abs. +;; Actually the instruction scheduling should decide whether to use a +;; zero-offset branch or not for any generic case involving a single +;; instruction on SH4 202. +(define_insn_and_split "negsi_cond" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (if_then_else + (eq:SI (reg:SI T_REG) (match_operand:SI 3 "const_int_operand" "M,N")) + (match_operand:SI 1 "arith_reg_operand" "0,0") + (neg:SI (match_operand:SI 2 "arith_reg_operand" "r,r"))))] + "TARGET_SH1 && TARGET_ZDCBRANCH" +{ + static const char* alt[] = + { + "bt 0f" "\n" + " neg %2,%0" "\n" + "0:", + + "bf 0f" "\n" + " neg %2,%0" "\n" + "0:" + }; + return alt[which_alternative]; +} + "TARGET_SH1 && ! TARGET_ZDCBRANCH" + [(const_int 0)] +{ + rtx skip_neg_label = gen_label_rtx (); + + emit_move_insn (operands[0], operands[1]); + + emit_jump_insn (INTVAL (operands[3]) + ? gen_branch_true (skip_neg_label) + : gen_branch_false (skip_neg_label)); + + emit_label_after (skip_neg_label, + emit_insn (gen_negsi2 (operands[0], operands[1]))); + DONE; +} + [(set_attr "type" "arith") ;; poor approximation + (set_attr "length" "4")]) + +(define_insn_and_split "negdi_cond" + [(set (match_operand:DI 0 "arith_reg_dest") + (if_then_else + (eq:SI (reg:SI T_REG) (match_operand:SI 3 "const_int_operand")) + (match_operand:DI 1 "arith_reg_operand") + (neg:DI (match_operand:DI 2 "arith_reg_operand")))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + rtx skip_neg_label = gen_label_rtx (); + + emit_move_insn (operands[0], operands[1]); + + emit_jump_insn (INTVAL (operands[3]) + ? gen_branch_true (skip_neg_label) + : gen_branch_false (skip_neg_label)); + + if (!INTVAL (operands[3])) + emit_insn (gen_clrt ()); + + emit_insn (gen_negc (gen_lowpart (SImode, operands[0]), + gen_lowpart (SImode, operands[1]))); + emit_label_after (skip_neg_label, + emit_insn (gen_negc (gen_highpart (SImode, operands[0]), + gen_highpart (SImode, operands[1])))); + DONE; +}) + +(define_expand "bswapsi2" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (bswap:SI (match_operand:SI 1 "arith_reg_operand" "")))] + "TARGET_SH1" +{ + if (! can_create_pseudo_p ()) + FAIL; + else + { + rtx tmp0 = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (SImode); + + emit_insn (gen_swapbsi2 (tmp0, operands[1])); + emit_insn (gen_rotlsi3_16 (tmp1, tmp0)); + emit_insn (gen_swapbsi2 (operands[0], tmp1)); + DONE; + } +}) + +(define_insn "swapbsi2" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 4294901760)) + (ior:SI (and:SI (ashift:SI (match_dup 1) (const_int 8)) + (const_int 65280)) + (and:SI (ashiftrt:SI (match_dup 1) (const_int 8)) + (const_int 255)))))] + "TARGET_SH1" + "swap.b %1,%0" + [(set_attr "type" "arith")]) + +;; The *swapbisi2_and_shl8 pattern helps the combine pass simplifying +;; partial byte swap expressions such as... +;; ((x & 0xFF) << 8) | ((x >> 8) & 0xFF). +;; ...which are currently not handled by the tree optimizers. +;; The combine pass will not initially try to combine the full expression, +;; but only some sub-expressions. In such a case the *swapbisi2_and_shl8 +;; pattern acts as an intermediate pattern that will eventually lead combine +;; to the swapbsi2 pattern above. +;; As a side effect this also improves code that does (x & 0xFF) << 8 +;; or (x << 8) & 0xFF00. +(define_insn_and_split "*swapbisi2_and_shl8" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (and:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 8)) + (const_int 65280)) + (match_operand:SI 2 "arith_reg_operand" "r")))] + "TARGET_SH1 && ! reload_in_progress && ! reload_completed" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + rtx tmp0 = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extendqisi2 (tmp0, gen_lowpart (QImode, operands[1]))); + emit_insn (gen_swapbsi2 (tmp1, tmp0)); + emit_insn (gen_iorsi3 (operands[0], tmp1, operands[2])); + DONE; +}) + +;; The *swapbhisi2 pattern is, like the *swapbisi2_and_shl8 pattern, another +;; intermediate pattern that will help the combine pass arriving at swapbsi2. +(define_insn_and_split "*swapbhisi2" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (and:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r") + (const_int 8)) + (const_int 65280)) + (zero_extract:SI (match_dup 1) (const_int 8) (const_int 8))))] + "TARGET_SH1 && ! reload_in_progress && ! reload_completed" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extendhisi2 (tmp, gen_lowpart (HImode, operands[1]))); + emit_insn (gen_swapbsi2 (operands[0], tmp)); + DONE; +}) + +;; In some cases the swapbsi2 pattern might leave a sequence such as... +;; swap.b r4,r4 +;; mov r4,r0 +;; +;; which can be simplified to... +;; swap.b r4,r0 +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_dest" "") + (ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand" "") + (const_int 4294901760)) + (ior:SI (and:SI (ashift:SI (match_dup 1) (const_int 8)) + (const_int 65280)) + (and:SI (ashiftrt:SI (match_dup 1) (const_int 8)) + (const_int 255))))) + (set (match_operand:SI 2 "arith_reg_dest" "") + (match_dup 0))] + "TARGET_SH1 && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 2) + (ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand" "") + (const_int 4294901760)) + (ior:SI (and:SI (ashift:SI (match_dup 1) (const_int 8)) + (const_int 65280)) + (and:SI (ashiftrt:SI (match_dup 1) (const_int 8)) + (const_int 255)))))]) + +;; ------------------------------------------------------------------------- +;; Zero extension instructions +;; ------------------------------------------------------------------------- + +(define_insn "zero_extendsidi2" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extend:DI (match_operand:SI 1 "extend_reg_operand" "r")))] + "TARGET_SHMEDIA" + "addz.l %1, r63, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "extend")]) + +(define_insn "zero_extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:HI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + # + ld%M1.uw %m1, %0" + [(set_attr "type" "*,load_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI (match_operand:HI 1 "extend_reg_operand" "")))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 48))) + (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))] +{ + if (GET_CODE (operands[1]) == TRUNCATE) + operands[1] = XEXP (operands[1], 0); +}) + +;; ??? when a truncated input to a zero_extend is reloaded, reload will +;; reload the entire truncate expression. +(define_insn_and_split "*loaddi_trunc" + [(set (match_operand 0 "any_register_operand" "=r") + (truncate (match_operand:DI 1 "memory_operand" "m")))] + "TARGET_SHMEDIA && reload_completed" + "#" + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[0] = gen_rtx_REG (DImode, true_regnum (operands[0])); +}) + +(define_insn "zero_extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (match_operand:QI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + andi %1, 255, %0 + ld%M1.ub %m1, %0" + [(set_attr "type" "arith_media,load_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_expand "zero_extend<mode>si2" + [(set (match_operand:SI 0 "arith_reg_dest") + (zero_extend:SI (match_operand:QIHI 1 "zero_extend_operand")))]) + +(define_insn_and_split "*zero_extend<mode>si2_compact" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (zero_extend:SI (match_operand:QIHI 1 "arith_reg_operand" "r")))] + "TARGET_SH1" + "extu.<bw> %1,%0" + "&& can_create_pseudo_p ()" + [(set (match_dup 0) (match_dup 2))] +{ + /* Sometimes combine fails to combine a T bit or negated T bit store to a + reg with a following zero extension. In the split pass after combine, + try to figure out how the extended reg was set. If it originated from + the T bit we can replace the zero extension with a reg move, which will + be eliminated. Notice that this also helps the *cbranch_t splitter when + it tries to post-combine tests and conditional branches, as it does not + check for zero extensions. */ + operands[2] = sh_try_omit_signzero_extend (operands[1], curr_insn); + if (operands[2] == NULL_RTX) + FAIL; +} + [(set_attr "type" "arith")]) + +(define_insn "*zero_extendhisi2_media" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:HI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + # + ld%M1.uw %m1, %0" + [(set_attr "type" "arith_media,load_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (match_operand:HI 1 "extend_reg_operand" "")))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16))) + (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 16)))] +{ + rtx op1 = operands[1]; + + if (GET_CODE (op1) == TRUNCATE) + op1 = XEXP (op1, 0); + operands[2] + = simplify_gen_subreg (SImode, op1, GET_MODE (op1), + subreg_lowpart_offset (SImode, GET_MODE (op1))); +}) + +(define_insn "*zero_extendqisi2_media" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (zero_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + andi %1, 255, %0 + ld%M1.ub %m1, %0" + [(set_attr "type" "arith_media,load_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_insn "zero_extendqihi2" + [(set (match_operand:HI 0 "arith_reg_dest" "=r") + (zero_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))] + "TARGET_SH1" + "extu.b %1,%0" + [(set_attr "type" "arith")]) + +;; SH2A supports two zero extending load instructions: movu.b and movu.w. +;; They could also be used for simple memory addresses like @Rn by setting +;; the displacement value to zero. However, doing so too early results in +;; missed opportunities for other optimizations such as post-inc or index +;; addressing loads. +;; Although the 'zero_extend_movu_operand' predicate does not allow simple +;; register addresses (an address without a displacement, index, post-inc), +;; zero-displacement addresses might be generated during reload, wich are +;; simplified to simple register addresses in turn. Thus, we have to +;; provide the Sdd and Sra alternatives in the patterns. +(define_insn "*zero_extend<mode>si2_disp_mem" + [(set (match_operand:SI 0 "arith_reg_dest" "=r,r") + (zero_extend:SI + (match_operand:QIHI 1 "zero_extend_movu_operand" "Sdd,Sra")))] + "TARGET_SH2A" + "@ + movu.<bw> %1,%0 + movu.<bw> @(0,%t1),%0" + [(set_attr "type" "load") + (set_attr "length" "4")]) + +;; Convert the zero extending loads in sequences such as: +;; movu.b @(1,r5),r0 movu.w @(2,r5),r0 +;; mov.b r0,@(1,r4) mov.b r0,@(1,r4) +;; +;; back to sign extending loads like: +;; mov.b @(1,r5),r0 mov.w @(2,r5),r0 +;; mov.b r0,@(1,r4) mov.b r0,@(1,r4) +;; +;; if the extension type is irrelevant. The sign extending mov.{b|w} insn +;; is only 2 bytes in size if the displacement is {K04|K05}. +;; If the displacement is greater it doesn't matter, so we convert anyways. +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_dest" "") + (zero_extend:SI (match_operand 1 "displacement_mem_operand" ""))) + (set (match_operand 2 "nonimmediate_operand" "") + (match_operand 3 "arith_reg_operand" ""))] + "TARGET_SH2A + && REGNO (operands[0]) == REGNO (operands[3]) + && peep2_reg_dead_p (2, operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[2])) + <= GET_MODE_SIZE (GET_MODE (operands[1]))" + [(set (match_dup 0) (sign_extend:SI (match_dup 1))) + (set (match_dup 2) (match_dup 3))]) + +;; Fold sequences such as +;; mov.b @r3,r7 +;; extu.b r7,r7 +;; into +;; movu.b @(0,r3),r7 +;; This does not reduce the code size but the number of instructions is +;; halved, which results in faster code. +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_dest" "") + (sign_extend:SI (match_operand 1 "simple_mem_operand" ""))) + (set (match_operand:SI 2 "arith_reg_dest" "") + (zero_extend:SI (match_operand 3 "arith_reg_operand" "")))] + "TARGET_SH2A + && GET_MODE (operands[1]) == GET_MODE (operands[3]) + && (GET_MODE (operands[1]) == QImode || GET_MODE (operands[1]) == HImode) + && REGNO (operands[0]) == REGNO (operands[3]) + && (REGNO (operands[2]) == REGNO (operands[0]) + || peep2_reg_dead_p (2, operands[0]))" + [(set (match_dup 2) (zero_extend:SI (match_dup 4)))] +{ + operands[4] + = replace_equiv_address (operands[1], + gen_rtx_PLUS (SImode, XEXP (operands[1], 0), + const0_rtx)); +}) + +;; ------------------------------------------------------------------------- +;; Sign extension instructions +;; ------------------------------------------------------------------------- + +;; ??? This should be a define expand. +;; ??? Or perhaps it should be dropped? + +;; convert_move generates good code for SH[1-4]. +(define_insn "extendsidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r,r") + (sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,?f")))] + "TARGET_SHMEDIA" + "@ + add.l %1, r63, %0 + ld%M1.l %m1, %0 + fmov.sl %1, %0" + [(set_attr "type" "arith_media,load_media,fpconv_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "extend")))]) + +(define_insn "extendhidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI (match_operand:HI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + # + ld%M1.w %m1, %0" + [(set_attr "type" "*,load_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:HI 1 "extend_reg_operand" "")))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 48))) + (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))] +{ + if (GET_CODE (operands[1]) == TRUNCATE) + operands[1] = XEXP (operands[1], 0); +}) + +(define_insn "extendqidi2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI (match_operand:QI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + # + ld%M1.b %m1, %0" + [(set_attr "type" "*,load_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (sign_extend:DI (match_operand:QI 1 "extend_reg_operand" "")))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 56))) + (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))] +{ + if (GET_CODE (operands[1]) == TRUNCATE) + operands[1] = XEXP (operands[1], 0); +}) + +(define_expand "extend<mode>si2" + [(set (match_operand:SI 0 "arith_reg_dest") + (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))]) + +(define_insn "*extendhisi2_media" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (sign_extend:SI (match_operand:HI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + # + ld%M1.w %m1, %0" + [(set_attr "type" "arith_media,load_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:HI 1 "extend_reg_operand" "")))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16))) + (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 16)))] +{ + rtx op1 = operands[1]; + if (GET_CODE (op1) == TRUNCATE) + op1 = XEXP (op1, 0); + operands[2] + = simplify_gen_subreg (SImode, op1, GET_MODE (op1), + subreg_lowpart_offset (SImode, GET_MODE (op1))); +}) + +(define_insn_and_split "*extend<mode>si2_compact_reg" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (sign_extend:SI (match_operand:QIHI 1 "arith_reg_operand" "r")))] + "TARGET_SH1" + "exts.<bw> %1,%0" + "&& can_create_pseudo_p ()" + [(set (match_dup 0) (match_dup 2))] +{ + /* Sometimes combine fails to combine a T bit or negated T bit store to a + reg with a following sign extension. In the split pass after combine, + try to figure the extended reg was set. If it originated from the T + bit we can replace the sign extension with a reg move, which will be + eliminated. */ + operands[2] = sh_try_omit_signzero_extend (operands[1], curr_insn); + if (operands[2] == NULL_RTX) + FAIL; +} + [(set_attr "type" "arith")]) + +;; FIXME: Fold non-SH2A and SH2A alternatives with "enabled" attribute. +;; See movqi insns. +(define_insn "*extend<mode>si2_compact_mem_disp" + [(set (match_operand:SI 0 "arith_reg_dest" "=z,r") + (sign_extend:SI + (mem:QIHI + (plus:SI + (match_operand:SI 1 "arith_reg_operand" "%r,r") + (match_operand:SI 2 "const_int_operand" "<disp04>,N")))))] + "TARGET_SH1 && ! TARGET_SH2A + && sh_legitimate_index_p (<MODE>mode, operands[2], false, true)" + "@ + mov.<bw> @(%O2,%1),%0 + mov.<bw> @%1,%0" + [(set_attr "type" "load")]) + +(define_insn "*extend<mode>si2_compact_mem_disp" + [(set (match_operand:SI 0 "arith_reg_dest" "=z,r,r") + (sign_extend:SI + (mem:QIHI + (plus:SI + (match_operand:SI 1 "arith_reg_operand" "%r,r,r") + (match_operand:SI 2 "const_int_operand" "<disp04>,N,<disp12>")))))] + "TARGET_SH2A && sh_legitimate_index_p (<MODE>mode, operands[2], true, true)" + "@ + mov.<bw> @(%O2,%1),%0 + mov.<bw> @%1,%0 + mov.<bw> @(%O2,%1),%0" + [(set_attr "type" "load") + (set_attr "length" "2,2,4")]) + +;; The *_snd patterns will take care of other QImode/HImode addressing +;; modes than displacement addressing. They must be defined _after_ the +;; displacement addressing patterns. Otherwise the displacement addressing +;; patterns will not be picked. +(define_insn "*extend<mode>si2_compact_snd" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (sign_extend:SI + (match_operand:QIHI 1 "movsrc_no_disp_mem_operand" "Snd")))] + "TARGET_SH1" + "mov.<bw> %1,%0" + [(set_attr "type" "load")]) + +(define_insn "*extendqisi2_media" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (sign_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))] + "TARGET_SHMEDIA" + "@ + # + ld%M1.b %m1, %0" + [(set_attr "type" "arith_media,load_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (sign_extend:SI (match_operand:QI 1 "extend_reg_operand" "")))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24))) + (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 24)))] +{ + rtx op1 = operands[1]; + if (GET_CODE (op1) == TRUNCATE) + op1 = XEXP (op1, 0); + operands[2] + = simplify_gen_subreg (SImode, op1, GET_MODE (op1), + subreg_lowpart_offset (SImode, GET_MODE (op1))); +}) + +(define_expand "extendqihi2" + [(set (match_operand:HI 0 "arith_reg_dest" "") + (sign_extend:HI (match_operand:QI 1 "arith_reg_operand" "")))] + "" + "") + +(define_insn "*extendqihi2_compact_reg" + [(set (match_operand:HI 0 "arith_reg_dest" "=r") + (sign_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))] + "TARGET_SH1" + "exts.b %1,%0" + [(set_attr "type" "arith")]) + +;; It would seem useful to combine the truncXi patterns into the movXi +;; patterns, but unary operators are ignored when matching constraints, +;; so we need separate patterns. +(define_insn "truncdisi2" + [(set (match_operand:SI 0 "general_movdst_operand" "=r,m,m,f,r,f") + (truncate:SI (match_operand:DI 1 "register_operand" "r,r,f,r,f,f")))] + "TARGET_SHMEDIA" + "@ + add.l %1, r63, %0 + st%M0.l %m0, %1 + fst%M0.s %m0, %T1 + fmov.ls %1, %0 + fmov.sl %T1, %0 + fmov.s %T1, %0" + [(set_attr "type" "arith_media,store_media,fstore_media,fload_media, + fpconv_media,fmove_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "extend")))]) + +(define_insn "truncdihi2" + [(set (match_operand:HI 0 "general_movdst_operand" "=?r,m") + (truncate:HI (match_operand:DI 1 "register_operand" "r,r")))] + "TARGET_SHMEDIA" +{ + static const char* alt[] = + { + "shlli %1,48,%0" "\n" + " shlri %0,48,%0", + + "st%M0.w %m0, %1" + }; + return alt[which_alternative]; +} + [(set_attr "type" "arith_media,store_media") + (set_attr "length" "8,4") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "extend")))]) + +; N.B. This should agree with LOAD_EXTEND_OP and movqi. +; Because we use zero extension, we can't provide signed QImode compares +; using a simple compare or conditional branch insn. +(define_insn "truncdiqi2" + [(set (match_operand:QI 0 "general_movdst_operand" "=r,m") + (truncate:QI (match_operand:DI 1 "register_operand" "r,r")))] + "TARGET_SHMEDIA" + "@ + andi %1, 255, %0 + st%M0.b %m0, %1" + [(set_attr "type" "arith_media,store") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "extend")))]) + +;; ------------------------------------------------------------------------- +;; Move instructions +;; ------------------------------------------------------------------------- + +;; define push and pop so it is easy for sh.c +;; We can't use push and pop on SHcompact because the stack must always +;; be 8-byte aligned. +(define_expand "push" + [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) + (match_operand:SI 0 "register_operand" "r,l,x"))] + "TARGET_SH1 && ! TARGET_SH5" + "") + +(define_expand "pop" + [(set (match_operand:SI 0 "register_operand" "=r,l,x") + (mem:SI (post_inc:SI (reg:SI SP_REG))))] + "TARGET_SH1 && ! TARGET_SH5" + "") + +(define_expand "push_e" + [(parallel [(set (mem:SF (pre_dec:SI (reg:SI SP_REG))) + (match_operand:SF 0 "" "")) + (use (reg:PSI FPSCR_REG)) + (clobber (scratch:SI))])] + "TARGET_SH1 && ! TARGET_SH5" + "") + +(define_insn "push_fpul" + [(set (mem:SF (pre_dec:SI (reg:SI SP_REG))) (reg:SF FPUL_REG))] + "TARGET_SH2E && ! TARGET_SH5" + "sts.l fpul,@-r15" + [(set_attr "type" "fstore") + (set_attr "late_fp_use" "yes") + (set_attr "hit_stack" "yes")]) + +;; DFmode pushes for sh4 require a lot of what is defined for movdf_i4, +;; so use that. +(define_expand "push_4" + [(parallel [(set (mem:DF (pre_dec:SI (reg:SI SP_REG))) + (match_operand:DF 0 "" "")) + (use (reg:PSI FPSCR_REG)) + (clobber (scratch:SI))])] + "TARGET_SH1 && ! TARGET_SH5" + "") + +(define_expand "pop_e" + [(parallel [(set (match_operand:SF 0 "" "") + (mem:SF (post_inc:SI (reg:SI SP_REG)))) + (use (reg:PSI FPSCR_REG)) + (clobber (scratch:SI))])] + "TARGET_SH1 && ! TARGET_SH5" + "") + +(define_insn "pop_fpul" + [(set (reg:SF FPUL_REG) (mem:SF (post_inc:SI (reg:SI SP_REG))))] + "TARGET_SH2E && ! TARGET_SH5" + "lds.l @r15+,fpul" + [(set_attr "type" "load") + (set_attr "hit_stack" "yes")]) + +(define_expand "pop_4" + [(parallel [(set (match_operand:DF 0 "" "") + (mem:DF (post_inc:SI (reg:SI SP_REG)))) + (use (reg:PSI FPSCR_REG)) + (clobber (scratch:SI))])] + "TARGET_SH1 && ! TARGET_SH5" + "") + +(define_expand "push_fpscr" + [(const_int 0)] + "TARGET_SH2E" +{ + rtx insn = emit_insn (gen_fpu_switch (gen_frame_mem (PSImode, + gen_rtx_PRE_DEC (Pmode, + stack_pointer_rtx)), + get_fpscr_rtx ())); + add_reg_note (insn, REG_INC, stack_pointer_rtx); + DONE; +}) + +(define_expand "pop_fpscr" + [(const_int 0)] + "TARGET_SH2E" +{ + rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (), + gen_frame_mem (PSImode, + gen_rtx_POST_INC (Pmode, + stack_pointer_rtx)))); + add_reg_note (insn, REG_INC, stack_pointer_rtx); + DONE; +}) + +;; The clrt and sett patterns can happen as the result of optimization and +;; insn expansion. +;; Comparisons might get simplified to a move of zero or 1 into the T reg. +;; In this case they might not disappear completely, because the T reg is +;; a fixed hard reg. +;; When DImode operations that use the T reg as carry/borrow are split into +;; individual SImode operations, the T reg is usually cleared before the +;; first SImode insn. +(define_insn "clrt" + [(set (reg:SI T_REG) (const_int 0))] + "TARGET_SH1" + "clrt" + [(set_attr "type" "mt_group")]) + +(define_insn "sett" + [(set (reg:SI T_REG) (const_int 1))] + "TARGET_SH1" + "sett" + [(set_attr "type" "mt_group")]) + +;; Use the combine pass to transform sequences such as +;; mov r5,r0 +;; add #1,r0 +;; shll2 r0 +;; mov.l @(r0,r4),r0 +;; into +;; shll2 r5 +;; add r4,r5 +;; mov.l @(4,r5),r0 +;; +;; See also PR 39423. +;; Notice that these patterns have a T_REG clobber, because the shift +;; sequence that will be split out might clobber the T_REG. Ideally, the +;; clobber would be added conditionally, depending on the result of +;; sh_ashlsi_clobbers_t_reg_p. When splitting out the shifts we must go +;; through the ashlsi3 expander in order to get the right shift insn -- +;; a T_REG clobbering or non-clobbering shift sequence or dynamic shift. +;; FIXME: Combine never tries this kind of patterns for DImode. +(define_insn_and_split "*movsi_index_disp_load" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (match_operand:SI 1 "mem_index_disp_operand" "m")) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(set (match_dup 6) (plus:SI (match_dup 5) (match_dup 3))) + (set (match_dup 0) (match_dup 7))] +{ + rtx mem = operands[1]; + rtx plus0_rtx = XEXP (mem, 0); + rtx plus1_rtx = XEXP (plus0_rtx, 0); + rtx mult_rtx = XEXP (plus1_rtx, 0); + + operands[1] = XEXP (mult_rtx, 0); + operands[2] = GEN_INT (exact_log2 (INTVAL (XEXP (mult_rtx, 1)))); + operands[3] = XEXP (plus1_rtx, 1); + operands[4] = XEXP (plus0_rtx, 1); + operands[5] = gen_reg_rtx (SImode); + operands[6] = gen_reg_rtx (SImode); + operands[7] = + replace_equiv_address (mem, + gen_rtx_PLUS (SImode, operands[6], operands[4])); + + emit_insn (gen_ashlsi3 (operands[5], operands[1], operands[2])); +}) + +(define_insn_and_split "*movhi_index_disp_load" + [(set (match_operand:SI 0 "arith_reg_dest") + (SZ_EXTEND:SI (match_operand:HI 1 "mem_index_disp_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + rtx mem = operands[1]; + rtx plus0_rtx = XEXP (mem, 0); + rtx plus1_rtx = XEXP (plus0_rtx, 0); + rtx mult_rtx = XEXP (plus1_rtx, 0); + + rtx op_1 = XEXP (mult_rtx, 0); + rtx op_2 = GEN_INT (exact_log2 (INTVAL (XEXP (mult_rtx, 1)))); + rtx op_3 = XEXP (plus1_rtx, 1); + rtx op_4 = XEXP (plus0_rtx, 1); + rtx op_5 = gen_reg_rtx (SImode); + rtx op_6 = gen_reg_rtx (SImode); + rtx op_7 = replace_equiv_address (mem, gen_rtx_PLUS (SImode, op_6, op_4)); + + emit_insn (gen_ashlsi3 (op_5, op_1, op_2)); + emit_insn (gen_addsi3 (op_6, op_5, op_3)); + + if (<CODE> == SIGN_EXTEND) + { + emit_insn (gen_extendhisi2 (operands[0], op_7)); + DONE; + } + else if (<CODE> == ZERO_EXTEND) + { + /* On SH2A the movu.w insn can be used for zero extending loads. */ + if (TARGET_SH2A) + emit_insn (gen_zero_extendhisi2 (operands[0], op_7)); + else + { + emit_insn (gen_extendhisi2 (operands[0], op_7)); + emit_insn (gen_zero_extendhisi2 (operands[0], + gen_lowpart (HImode, operands[0]))); + } + DONE; + } + else + FAIL; +}) + +(define_insn_and_split "*mov<mode>_index_disp_store" + [(set (match_operand:HISI 0 "mem_index_disp_operand" "=m") + (match_operand:HISI 1 "arith_reg_operand" "r")) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(set (match_dup 6) (plus:SI (match_dup 5) (match_dup 3))) + (set (match_dup 7) (match_dup 1))] +{ + rtx mem = operands[0]; + rtx plus0_rtx = XEXP (mem, 0); + rtx plus1_rtx = XEXP (plus0_rtx, 0); + rtx mult_rtx = XEXP (plus1_rtx, 0); + + operands[0] = XEXP (mult_rtx, 0); + operands[2] = GEN_INT (exact_log2 (INTVAL (XEXP (mult_rtx, 1)))); + operands[3] = XEXP (plus1_rtx, 1); + operands[4] = XEXP (plus0_rtx, 1); + operands[5] = gen_reg_rtx (SImode); + operands[6] = gen_reg_rtx (SImode); + operands[7] = + replace_equiv_address (mem, + gen_rtx_PLUS (SImode, operands[6], operands[4])); + + emit_insn (gen_ashlsi3 (operands[5], operands[0], operands[2])); +}) + +;; t/r must come after r/r, lest reload will try to reload stuff like +;; (set (subreg:SI (mem:QI (plus:SI (reg:SI SP_REG) (const_int 12)) 0) 0) +;; (made from (set (subreg:SI (reg:QI ###) 0) ) into T. +(define_insn "movsi_i" + [(set (match_operand:SI 0 "general_movdst_operand" + "=r,r,r,r,r,r,m,<,<,x,l,x,l,r") + (match_operand:SI 1 "general_movsrc_operand" + "Q,r,I08,mr,x,l,r,x,l,r,r,>,>,i"))] + "TARGET_SH1 + && ! TARGET_SH2E + && ! TARGET_SH2A + && (register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + mov.l %1,%0 + mov %1,%0 + mov %1,%0 + mov.l %1,%0 + sts %1,%0 + sts %1,%0 + mov.l %1,%0 + sts.l %1,%0 + sts.l %1,%0 + lds %1,%0 + lds %1,%0 + lds.l %1,%0 + lds.l %1,%0 + fake %1,%0" + [(set_attr "type" "pcload_si,move,movi8,load_si,mac_gp,prget,store,mac_mem, + pstore,gp_mac,prset,mem_mac,pload,pcload_si") + (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*")]) + +;; t/r must come after r/r, lest reload will try to reload stuff like +;; (subreg:SI (reg:SF FR14_REG) 0) into T (compiling stdlib/strtod.c -m3e -O2) +;; ??? This allows moves from macl to fpul to be recognized, but these moves +;; will require a reload. +;; ??? We can't include f/f because we need the proper FPSCR setting when +;; TARGET_FMOVD is in effect, and mode switching is done before reload. +(define_insn "movsi_ie" + [(set (match_operand:SI 0 "general_movdst_operand" + "=r,r,r,r,r,r,r,r,m,<,<,x,l,x,l,y,<,r,y,r,*f,y,*f,y") + (match_operand:SI 1 "general_movsrc_operand" + "Q,r,I08,I20,I28,mr,x,l,r,x,l,r,r,>,>,>,y,i,r,y,y,*f,*f,y"))] + "(TARGET_SH2E || TARGET_SH2A) + && (register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + mov.l %1,%0 + mov %1,%0 + mov %1,%0 + movi20 %1,%0 + movi20s %1,%0 + mov.l %1,%0 + sts %1,%0 + sts %1,%0 + mov.l %1,%0 + sts.l %1,%0 + sts.l %1,%0 + lds %1,%0 + lds %1,%0 + lds.l %1,%0 + lds.l %1,%0 + lds.l %1,%0 + sts.l %1,%0 + fake %1,%0 + lds %1,%0 + sts %1,%0 + fsts fpul,%0 + flds %1,fpul + fmov %1,%0 + ! move optimized away" + [(set_attr "type" "pcload_si,move,movi8,move,move,load_si,mac_gp,prget,store, + mac_mem,pstore,gp_mac,prset,mem_mac,pload,load,fstore, + pcload_si,gp_fpul,fpul_gp,fmove,fmove,fmove,nil") + (set_attr "late_fp_use" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes,*,*,yes,*,*,*,*") + (set_attr_alternative "length" + [(const_int 2) + (const_int 2) + (const_int 2) + (const_int 4) + (const_int 4) + (if_then_else + (match_test "TARGET_SH2A") + (const_int 4) (const_int 2)) + (const_int 2) + (const_int 2) + (if_then_else + (match_test "TARGET_SH2A") + (const_int 4) (const_int 2)) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 0)])]) + +(define_insn "movsi_i_lowpart" + [(set (strict_low_part + (match_operand:SI 0 "general_movdst_operand" "+r,r,r,r,r,r,m,r")) + (match_operand:SI 1 "general_movsrc_operand" "Q,r,I08,mr,x,l,r,i"))] + "TARGET_SH1 + && (register_operand (operands[0], SImode) + || register_operand (operands[1], SImode))" + "@ + mov.l %1,%0 + mov %1,%0 + mov %1,%0 + mov.l %1,%0 + sts %1,%0 + sts %1,%0 + mov.l %1,%0 + fake %1,%0" + [(set_attr "type" "pcload,move,arith,load,mac_gp,prget,store,pcload")]) + +(define_insn_and_split "load_ra" + [(set (match_operand:SI 0 "general_movdst_operand" "") + (unspec:SI [(match_operand:SI 1 "register_operand" "")] UNSPEC_RA))] + "TARGET_SH1" + "#" + "&& ! currently_expanding_to_rtl" + [(set (match_dup 0) (match_dup 1))] +{ + if (TARGET_SHCOMPACT && crtl->saves_all_registers) + operands[1] = gen_frame_mem (SImode, return_address_pointer_rtx); +}) + +;; The '?'s in the following constraints may not reflect the time taken +;; to perform the move. They are there to discourage the use of floating- +;; point registers for storing integer values. +(define_insn "*movsi_media" + [(set (match_operand:SI 0 "general_movdst_operand" + "=r,r,r,r,m,f?,m,f?,r,f?,*b,r,b") + (match_operand:SI 1 "general_movsrc_operand" + "r,I16Css,nCpg,m,rZ,m,f?,rZ,f?,f?,r,*b,Csy"))] + "TARGET_SHMEDIA_FPU + && (register_operand (operands[0], SImode) + || sh_register_operand (operands[1], SImode) + || GET_CODE (operands[1]) == TRUNCATE)" + "@ + add.l %1, r63, %0 + movi %1, %0 + # + ld%M1.l %m1, %0 + st%M0.l %m0, %N1 + fld%M1.s %m1, %0 + fst%M0.s %m0, %1 + fmov.ls %N1, %0 + fmov.sl %1, %0 + fmov.s %1, %0 + ptabs %1, %0 + gettr %1, %0 + pt %1, %0" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media, + fload_media,fstore_media,fload_media,fpconv_media, + fmove_media,ptabs_media,gettr_media,pt_media") + (set_attr "length" "4,4,8,4,4,4,4,4,4,4,4,4,12") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_insn "*movsi_media_nofpu" + [(set (match_operand:SI 0 "general_movdst_operand" + "=r,r,r,r,m,*b,r,*b") + (match_operand:SI 1 "general_movsrc_operand" + "r,I16Css,nCpg,m,rZ,r,*b,Csy"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], SImode) + || sh_register_operand (operands[1], SImode) + || GET_CODE (operands[1]) == TRUNCATE)" + "@ + add.l %1, r63, %0 + movi %1, %0 + # + ld%M1.l %m1, %0 + st%M0.l %m0, %N1 + ptabs %1, %0 + gettr %1, %0 + pt %1, %0" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media, + ptabs_media,gettr_media,pt_media") + (set_attr "length" "4,4,8,4,4,4,4,12") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_expand "movsi_const" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (const:SI (unspec:SI [(match_operand:DI 1 "immediate_operand" "s") + (const_int 16)] UNSPEC_EXTRACT_S16))) + (set (match_dup 0) + (ior:SI (ashift:SI (match_dup 0) (const_int 16)) + (const:SI (unspec:SI [(match_dup 1) + (const_int 0)] UNSPEC_EXTRACT_U16))))] + "TARGET_SHMEDIA && reload_completed + && MOVI_SHORI_BASE_OPERAND_P (operands[1])" +{ + if (GET_CODE (operands[1]) == LABEL_REF + && GET_CODE (XEXP (operands[1], 0)) == CODE_LABEL) + LABEL_NUSES (XEXP (operands[1], 0)) += 2; + else if (GOTOFF_P (operands[1])) + { + rtx unspec = XEXP (operands[1], 0); + + if (! UNSPEC_GOTOFF_P (unspec)) + { + unspec = XEXP (unspec, 0); + if (! UNSPEC_GOTOFF_P (unspec)) + abort (); + } + if (GET_CODE (XVECEXP (unspec , 0, 0)) == LABEL_REF + && (GET_CODE (XEXP (XVECEXP (unspec, 0, 0), 0)) == CODE_LABEL)) + LABEL_NUSES (XEXP (XVECEXP (unspec, 0, 0), 0)) += 2; + } +}) + +(define_expand "movsi_const_16bit" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (const:SI (unspec:SI [(match_operand:DI 1 "immediate_operand" "s") + (const_int 0)] UNSPEC_EXTRACT_S16)))] + "TARGET_SHMEDIA && flag_pic && reload_completed + && GET_CODE (operands[1]) == SYMBOL_REF" + "") + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest" "") + (match_operand:SI 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && MOVI_SHORI_BASE_OPERAND_P (operands[1])" + [(const_int 0)] +{ + rtx insn = emit_insn (gen_movsi_const (operands[0], operands[1])); + + set_unique_reg_note (insn, REG_EQUAL, copy_rtx (operands[1])); + + DONE; +}) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && ((CONST_INT_P (operands[1]) + && ! satisfies_constraint_I16 (operands[1])) + || GET_CODE (operands[1]) == CONST_DOUBLE)" + [(set (subreg:DI (match_dup 0) 0) (match_dup 1))]) + +(define_expand "movsi" + [(set (match_operand:SI 0 "general_movdst_operand" "") + (match_operand:SI 1 "general_movsrc_operand" ""))] + "" +{ + prepare_move_operands (operands, SImode); +}) + +(define_expand "ic_invalidate_line" + [(parallel [(unspec_volatile [(match_operand:SI 0 "register_operand" "+r") + (match_dup 1)] UNSPEC_ICACHE) + (clobber (scratch:SI))])] + "TARGET_HARD_SH4 || TARGET_SH5" +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_ic_invalidate_line_media (operands[0])); + DONE; + } + else if (TARGET_SHCOMPACT) + { + operands[1] = function_symbol (NULL, "__ic_invalidate", SFUNC_STATIC); + operands[1] = force_reg (Pmode, operands[1]); + emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1])); + DONE; + } + else if (TARGET_SH4A_ARCH || TARGET_SH4_300) + { + emit_insn (gen_ic_invalidate_line_sh4a (operands[0])); + DONE; + } + operands[0] = force_reg (Pmode, operands[0]); + operands[1] = force_reg (Pmode, GEN_INT (trunc_int_for_mode (0xf0000008, + Pmode))); +}) + +;; The address %0 is assumed to be 4-aligned at least. Thus, by ORing +;; 0xf0000008, we get the low-oder bits *1*00 (binary), which fits +;; the requirement *1*00 for associative address writes. The alignment of +;; %0 implies that its least significant bit is cleared, +;; thus we clear the V bit of a matching entry if there is one. +(define_insn "ic_invalidate_line_i" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "register_operand" "r")] + UNSPEC_ICACHE) + (clobber (match_scratch:SI 2 "=&r"))] + "TARGET_HARD_SH4" +{ + return "ocbwb @%0" "\n" + " extu.w %0,%2" "\n" + " or %1,%2" "\n" + " mov.l %0,@%2"; +} + [(set_attr "length" "8") + (set_attr "type" "cwb")]) + +(define_insn "ic_invalidate_line_sh4a" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] + UNSPEC_ICACHE)] + "TARGET_SH4A_ARCH || TARGET_SH4_300" +{ + return "ocbwb @%0" "\n" + " synco" "\n" + " icbi @%0"; +} + [(set_attr "length" "16") ;; FIXME: Why 16 and not 6? Looks like typo. + (set_attr "type" "cwb")]) + +;; ??? could make arg 0 an offsettable memory operand to allow to save +;; an add in the code that calculates the address. +(define_insn "ic_invalidate_line_media" + [(unspec_volatile [(match_operand 0 "any_register_operand" "r")] + UNSPEC_ICACHE)] + "TARGET_SHMEDIA" +{ + return "ocbwb %0,0" "\n" + " synco" "\n" + " icbi %0,0" "\n" + " synci"; +} + [(set_attr "length" "16") + (set_attr "type" "invalidate_line_media")]) + +(define_insn "ic_invalidate_line_compact" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "z") + (match_operand:SI 1 "register_operand" "r")] + UNSPEC_ICACHE) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "initialize_trampoline" + [(match_operand:SI 0 "" "") + (match_operand:SI 1 "" "") + (match_operand:SI 2 "" "")] + "TARGET_SHCOMPACT" +{ + rtx sfun, tramp; + + tramp = force_reg (Pmode, operands[0]); + sfun = force_reg (Pmode, function_symbol (NULL, "__init_trampoline", + SFUNC_STATIC)); + emit_move_insn (gen_rtx_REG (SImode, R2_REG), operands[1]); + emit_move_insn (gen_rtx_REG (SImode, R3_REG), operands[2]); + + emit_insn (gen_initialize_trampoline_compact (tramp, sfun)); + DONE; +}) + +(define_insn "initialize_trampoline_compact" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "z") + (match_operand:SI 1 "register_operand" "r") + (reg:SI R2_REG) (reg:SI R3_REG)] + UNSPEC_INIT_TRAMP) + + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "movhi" + [(set (match_operand:HI 0 "general_movdst_operand" "") + (match_operand:HI 1 "general_movsrc_operand" ""))] + "" +{ + prepare_move_operands (operands, HImode); +}) + +(define_expand "movqi" + [(set (match_operand:QI 0 "general_operand" "") + (match_operand:QI 1 "general_operand" ""))] + "" +{ + prepare_move_operands (operands, QImode); +}) + +;; Specifying the displacement addressing load / store patterns separately +;; before the generic movqi / movhi pattern allows controlling the order +;; in which load / store insns are selected in a more fine grained way. +;; FIXME: The non-SH2A and SH2A variants should be combined by adding +;; "enabled" attribute as it is done in other targets. +(define_insn "*mov<mode>_store_mem_disp04" + [(set (mem:QIHI + (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r,r") + (match_operand:SI 1 "const_int_operand" "<disp04>,N"))) + (match_operand:QIHI 2 "arith_reg_operand" "z,r"))] + "TARGET_SH1 && sh_legitimate_index_p (<MODE>mode, operands[1], false, true)" + "@ + mov.<bw> %2,@(%O1,%0) + mov.<bw> %2,@%0" + [(set_attr "type" "store")]) + +(define_insn "*mov<mode>_store_mem_disp12" + [(set (mem:QIHI + (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r") + (match_operand:SI 1 "const_int_operand" "<disp12>"))) + (match_operand:QIHI 2 "arith_reg_operand" "r"))] + "TARGET_SH2A && sh_legitimate_index_p (<MODE>mode, operands[1], true, true)" + "mov.<bw> %2,@(%O1,%0)" + [(set_attr "type" "store") + (set_attr "length" "4")]) + +(define_insn "*mov<mode>_load_mem_disp04" + [(set (match_operand:QIHI 0 "arith_reg_dest" "=z,r") + (mem:QIHI + (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r") + (match_operand:SI 2 "const_int_operand" "<disp04>,N"))))] + "TARGET_SH1 && ! TARGET_SH2A + && sh_legitimate_index_p (<MODE>mode, operands[2], false, true)" + "@ + mov.<bw> @(%O2,%1),%0 + mov.<bw> @%1,%0" + [(set_attr "type" "load")]) + +(define_insn "*mov<mode>_load_mem_disp12" + [(set (match_operand:QIHI 0 "arith_reg_dest" "=z,r,r") + (mem:QIHI + (plus:SI + (match_operand:SI 1 "arith_reg_operand" "%r,r,r") + (match_operand:SI 2 "const_int_operand" "<disp04>,N,<disp12>"))))] + "TARGET_SH2A && sh_legitimate_index_p (<MODE>mode, operands[2], true, true)" + "@ + mov.<bw> @(%O2,%1),%0 + mov.<bw> @%1,%0 + mov.<bw> @(%O2,%1),%0" + [(set_attr "type" "load") + (set_attr "length" "2,2,4")]) + +;; The order of the constraint alternatives is important here. +;; Q/r has to come first, otherwise PC relative loads might wrongly get +;; placed into delay slots. Since there is no QImode PC relative load, the +;; Q constraint and general_movsrc_operand will reject it for QImode. +;; The Snd alternatives should come before Sdd in order to avoid a preference +;; of using r0 als the register operand for addressing modes other than +;; displacement addressing. +;; The Sdd alternatives allow only r0 as register operand, even though on +;; SH2A any register could be allowed by switching to a 32 bit insn. +;; Generally sticking to the r0 is preferrable, since it generates smaller +;; code. Obvious r0 reloads can then be eliminated with a peephole on SH2A. +(define_insn "*mov<mode>" + [(set (match_operand:QIHI 0 "general_movdst_operand" + "=r,r,r,Snd,r, Sdd,z, r,l") + (match_operand:QIHI 1 "general_movsrc_operand" + "Q,r,i,r, Snd,z, Sdd,l,r"))] + "TARGET_SH1 + && (arith_reg_operand (operands[0], <MODE>mode) + || arith_reg_operand (operands[1], <MODE>mode))" + "@ + mov.<bw> %1,%0 + mov %1,%0 + mov %1,%0 + mov.<bw> %1,%0 + mov.<bw> %1,%0 + mov.<bw> %1,%0 + mov.<bw> %1,%0 + sts %1,%0 + lds %1,%0" + [(set_attr "type" "pcload,move,movi8,store,load,store,load,prget,prset") + (set (attr "length") + (cond [(and (match_operand 0 "displacement_mem_operand") + (not (match_operand 0 "short_displacement_mem_operand"))) + (const_int 4) + (and (match_operand 1 "displacement_mem_operand") + (not (match_operand 1 "short_displacement_mem_operand"))) + (const_int 4)] + (const_int 2)))]) + +(define_insn "*movqi_media" + [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m") + (match_operand:QI 1 "general_movsrc_operand" "r,I16Css,m,rZ"))] + "TARGET_SHMEDIA + && (arith_reg_operand (operands[0], QImode) + || extend_reg_or_0_operand (operands[1], QImode))" + "@ + add.l %1, r63, %0 + movi %1, %0 + ld%M1.ub %m1, %0 + st%M0.b %m0, %N1" + [(set_attr "type" "arith_media,arith_media,load_media,store_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_expand "reload_inqi" + [(set (match_operand:SI 2 "" "=&r") + (match_operand:QI 1 "inqhi_operand" "")) + (set (match_operand:QI 0 "arith_reg_operand" "=r") + (truncate:QI (match_dup 3)))] + "TARGET_SHMEDIA" +{ + rtx inner = XEXP (operands[1], 0); + int regno = REGNO (inner); + + regno += HARD_REGNO_NREGS (regno, GET_MODE (inner)) - 1; + operands[1] = gen_rtx_REG (SImode, regno); + operands[3] = gen_rtx_REG (DImode, REGNO (operands[2])); +}) + +(define_insn "*movhi_media" + [(set (match_operand:HI 0 "general_movdst_operand" "=r,r,r,r,m") + (match_operand:HI 1 "general_movsrc_operand" "r,I16Css,n,m,rZ"))] + "TARGET_SHMEDIA + && (arith_reg_operand (operands[0], HImode) + || arith_reg_or_0_operand (operands[1], HImode))" + "@ + add.l %1, r63, %0 + movi %1, %0 + # + ld%M1.w %m1, %0 + st%M0.w %m0, %N1" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:HI 0 "register_operand" "") + (match_operand:HI 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && ! satisfies_constraint_I16 (operands[1])" + [(set (subreg:DI (match_dup 0) 0) (match_dup 1))]) + +(define_expand "reload_inhi" + [(set (match_operand:SI 2 "" "=&r") + (match_operand:HI 1 "inqhi_operand" "")) + (set (match_operand:HI 0 "arith_reg_operand" "=r") + (truncate:HI (match_dup 3)))] + "TARGET_SHMEDIA" +{ + rtx inner = XEXP (operands[1], 0); + int regno = REGNO (inner); + + regno += HARD_REGNO_NREGS (regno, GET_MODE (inner)) - 1; + operands[1] = gen_rtx_REG (SImode, regno); + operands[3] = gen_rtx_REG (DImode, REGNO (operands[2])); +}) + +;; x/r can be created by inlining/cse, e.g. for execute/961213-1.c +;; compiled with -m2 -ml -O3 -funroll-loops +(define_insn "*movdi_i" + [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,m,r,r,r,*!x") + (match_operand:DI 1 "general_movsrc_operand" "Q,r,m,r,I08,i,x,r"))] + "TARGET_SH1 + && (arith_reg_operand (operands[0], DImode) + || arith_reg_operand (operands[1], DImode))" +{ + return output_movedouble (insn, operands, DImode); +} + [(set_attr "length" "4") + (set_attr "type" "pcload,move,load,store,move,pcload,move,move")]) + +;; If the output is a register and the input is memory or a register, we have +;; to be careful and see which word needs to be loaded first. +(define_split + [(set (match_operand:DI 0 "general_movdst_operand" "") + (match_operand:DI 1 "general_movsrc_operand" ""))] + "TARGET_SH1 && reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + int regno; + + if ((MEM_P (operands[0]) + && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC) + || (MEM_P (operands[1]) + && GET_CODE (XEXP (operands[1], 0)) == POST_INC)) + FAIL; + + switch (GET_CODE (operands[0])) + { + case REG: + regno = REGNO (operands[0]); + break; + case SUBREG: + regno = subreg_regno (operands[0]); + break; + case MEM: + regno = -1; + break; + default: + gcc_unreachable (); + } + + if (regno == -1 + || ! refers_to_regno_p (regno, regno + 1, operands[1], 0)) + { + operands[2] = operand_subword (operands[0], 0, 0, DImode); + operands[3] = operand_subword (operands[1], 0, 0, DImode); + operands[4] = operand_subword (operands[0], 1, 0, DImode); + operands[5] = operand_subword (operands[1], 1, 0, DImode); + } + else + { + operands[2] = operand_subword (operands[0], 1, 0, DImode); + operands[3] = operand_subword (operands[1], 1, 0, DImode); + operands[4] = operand_subword (operands[0], 0, 0, DImode); + operands[5] = operand_subword (operands[1], 0, 0, DImode); + } + + if (operands[2] == 0 || operands[3] == 0 + || operands[4] == 0 || operands[5] == 0) + FAIL; +}) + +;; The '?'s in the following constraints may not reflect the time taken +;; to perform the move. They are there to discourage the use of floating- +;; point registers for storing integer values. +(define_insn "*movdi_media" + [(set (match_operand:DI 0 "general_movdst_operand" + "=r,r,r,rl,m,f?,m,f?,r,f?,*b,r,*b") + (match_operand:DI 1 "general_movsrc_operand" + "r,I16Css,nCpgF,m,rlZ,m,f?,rZ,f?,f?,r,*b,Csy"))] + "TARGET_SHMEDIA_FPU + && (register_operand (operands[0], DImode) + || sh_register_operand (operands[1], DImode))" + "@ + add %1, r63, %0 + movi %1, %0 + # + ld%M1.q %m1, %0 + st%M0.q %m0, %N1 + fld%M1.d %m1, %0 + fst%M0.d %m0, %1 + fmov.qd %N1, %0 + fmov.dq %1, %0 + fmov.d %1, %0 + ptabs %1, %0 + gettr %1, %0 + pt %1, %0" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media, + fload_media,fstore_media,fload_media,dfpconv_media, + fmove_media,ptabs_media,gettr_media,pt_media") + (set_attr "length" "4,4,16,4,4,4,4,4,4,4,4,4,*")]) + +(define_insn "*movdi_media_nofpu" + [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,rl,m,*b,r,*b"); + (match_operand:DI 1 "general_movsrc_operand" "r,I16Css,nCpgF,m,rlZ,r,*b,Csy"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], DImode) + || sh_register_operand (operands[1], DImode))" + "@ + add %1, r63, %0 + movi %1, %0 + # + ld%M1.q %m1, %0 + st%M0.q %m0, %N1 + ptabs %1, %0 + gettr %1, %0 + pt %1, %0" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media, + ptabs_media,gettr_media,pt_media") + (set_attr "length" "4,4,16,4,4,4,4,*")]) + +(define_insn "*movdi_media_I16" + [(set (match_operand:DI 0 "ext_dest_operand" "=r") + (match_operand:DI 1 "const_int_operand" "I16"))] + "TARGET_SHMEDIA && reload_completed" + "movi %1, %0" + [(set_attr "type" "arith_media") + (set_attr "length" "4")]) + +(define_split + [(set (match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && MOVI_SHORI_BASE_OPERAND_P (operands[1])" + [(set (match_dup 0) (match_dup 1))] +{ + rtx insn; + + if (TARGET_SHMEDIA64) + insn = emit_insn (gen_movdi_const (operands[0], operands[1])); + else + insn = emit_insn (gen_movdi_const_32bit (operands[0], operands[1])); + + set_unique_reg_note (insn, REG_EQUAL, copy_rtx (operands[1])); + + DONE; +}) + +(define_expand "movdi_const" + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s") + (const_int 48)] UNSPEC_EXTRACT_S16))) + (set (match_dup 0) + (ior:DI (ashift:DI (match_dup 0) (const_int 16)) + (const:DI (unspec:DI [(match_dup 1) + (const_int 32)] UNSPEC_EXTRACT_U16)))) + (set (match_dup 0) + (ior:DI (ashift:DI (match_dup 0) (const_int 16)) + (const:DI (unspec:DI [(match_dup 1) + (const_int 16)] UNSPEC_EXTRACT_U16)))) + (set (match_dup 0) + (ior:DI (ashift:DI (match_dup 0) (const_int 16)) + (const:DI (unspec:DI [(match_dup 1) + (const_int 0)] UNSPEC_EXTRACT_U16))))] + "TARGET_SHMEDIA64 && reload_completed + && MOVI_SHORI_BASE_OPERAND_P (operands[1])" +{ + sh_mark_label (operands[1], 4); +}) + +(define_expand "movdi_const_32bit" + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s") + (const_int 16)] UNSPEC_EXTRACT_S16))) + (set (match_dup 0) + (ior:DI (ashift:DI (match_dup 0) (const_int 16)) + (const:DI (unspec:DI [(match_dup 1) + (const_int 0)] UNSPEC_EXTRACT_U16))))] + "TARGET_SHMEDIA32 && reload_completed + && MOVI_SHORI_BASE_OPERAND_P (operands[1])" +{ + sh_mark_label (operands[1], 2); +}) + +(define_expand "movdi_const_16bit" + [(set (match_operand:DI 0 "arith_reg_operand" "=r") + (const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s") + (const_int 0)] UNSPEC_EXTRACT_S16)))] + "TARGET_SHMEDIA && flag_pic && reload_completed + && GET_CODE (operands[1]) == SYMBOL_REF" + "") + +(define_split + [(set (match_operand:DI 0 "ext_dest_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && CONST_INT_P (operands[1]) + && ! satisfies_constraint_I16 (operands[1])" + [(set (match_dup 0) (match_dup 2)) + (match_dup 1)] +{ + unsigned HOST_WIDE_INT val = INTVAL (operands[1]); + unsigned HOST_WIDE_INT low = val; + unsigned HOST_WIDE_INT high = val; + unsigned HOST_WIDE_INT sign; + unsigned HOST_WIDE_INT val2 = val ^ (val-1); + + /* Zero-extend the 16 least-significant bits. */ + low &= 0xffff; + + /* Arithmetic shift right the word by 16 bits. */ + high >>= 16; + if (GET_CODE (operands[0]) == SUBREG + && GET_MODE (SUBREG_REG (operands[0])) == SImode) + { + high &= 0xffff; + high ^= 0x8000; + high -= 0x8000; + } + else + { + sign = 1; + sign <<= (HOST_BITS_PER_WIDE_INT - 16 - 1); + high ^= sign; + high -= sign; + } + do + { + /* If we can't generate the constant with a two-insn movi / shori + sequence, try some other strategies. */ + if (! CONST_OK_FOR_I16 (high)) + { + /* Try constant load / left shift. We know VAL != 0. */ + val2 = val ^ (val-1); + if (val2 > 0x1ffff) + { + int trailing_zeroes = exact_log2 ((val2 >> 16) + 1) + 15; + + if (CONST_OK_FOR_I16 (val >> trailing_zeroes) + || (! CONST_OK_FOR_I16 (high >> 16) + && CONST_OK_FOR_I16 (val >> (trailing_zeroes + 16)))) + { + val2 = (HOST_WIDE_INT) val >> trailing_zeroes; + operands[1] = gen_ashldi3_media (operands[0], operands[0], + GEN_INT (trailing_zeroes)); + break; + } + } + /* Try constant load / right shift. */ + val2 = (val >> 15) + 1; + if (val2 == (val2 & -val2)) + { + int shift = 49 - exact_log2 (val2); + + val2 = trunc_int_for_mode (val << shift, DImode); + if (CONST_OK_FOR_I16 (val2)) + { + operands[1] = gen_lshrdi3_media (operands[0], operands[0], + GEN_INT (shift)); + break; + } + } + /* Try mperm.w . */ + val2 = val & 0xffff; + if ((val >> 16 & 0xffff) == val2 + && (val >> 32 & 0xffff) == val2 + && (val >> 48 & 0xffff) == val2) + { + val2 = (HOST_WIDE_INT) val >> 48; + operands[1] = gen_rtx_REG (V4HImode, true_regnum (operands[0])); + operands[1] = gen_mperm_w0 (operands[1], operands[1]); + break; + } + /* Try movi / mshflo.l */ + val2 = (HOST_WIDE_INT) val >> 32; + if (val2 == ((unsigned HOST_WIDE_INT) + trunc_int_for_mode (val, SImode))) + { + operands[1] = gen_mshflo_l_di (operands[0], operands[0], + operands[0]); + break; + } + /* Try movi / mshflo.l w/ r63. */ + val2 = val + ((HOST_WIDE_INT) -1 << 32); + if ((HOST_WIDE_INT) val2 < 0 && CONST_OK_FOR_I16 (val2)) + { + operands[1] = gen_mshflo_l_di (operands[0], operands[0], + const0_rtx); + break; + } + } + val2 = high; + operands[1] = gen_shori_media (operands[0], operands[0], GEN_INT (low)); + } + while (0); + operands[2] = GEN_INT (val2); +}) + +(define_split + [(set (match_operand:DI 0 "ext_dest_operand" "") + (match_operand:DI 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && GET_CODE (operands[1]) == CONST_DOUBLE" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 0) + (ior:DI (ashift:DI (match_dup 0) (const_int 16)) (match_dup 1)))] +{ + unsigned HOST_WIDE_INT low = CONST_DOUBLE_LOW (operands[1]); + unsigned HOST_WIDE_INT high = CONST_DOUBLE_HIGH (operands[1]); + unsigned HOST_WIDE_INT val = low; + unsigned HOST_WIDE_INT sign; + + /* Zero-extend the 16 least-significant bits. */ + val &= 0xffff; + operands[1] = GEN_INT (val); + + /* Arithmetic shift right the double-word by 16 bits. */ + low >>= 16; + low |= (high & 0xffff) << (HOST_BITS_PER_WIDE_INT - 16); + high >>= 16; + sign = 1; + sign <<= (HOST_BITS_PER_WIDE_INT - 16 - 1); + high ^= sign; + high -= sign; + + /* This will only be true if high is a sign-extension of low, i.e., + it must be either 0 or (unsigned)-1, and be zero iff the + most-significant bit of low is set. */ + if (high + (low >> (HOST_BITS_PER_WIDE_INT - 1)) == 0) + operands[2] = GEN_INT (low); + else + operands[2] = immed_double_const (low, high, DImode); +}) + +(define_insn "shori_media" + [(set (match_operand:DI 0 "ext_dest_operand" "=r,r") + (ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0,0") + (const_int 16)) + (match_operand:DI 2 "immediate_operand" "K16Csu,nF")))] + "TARGET_SHMEDIA && (reload_completed || arith_reg_dest (operands[0], DImode))" + "@ + shori %u2, %0 + #" + [(set_attr "type" "arith_media,*")]) + +(define_insn "*shori_media_si" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 16)) + (match_operand:SI 2 "immediate_operand" "K16Csu")))] + "TARGET_SHMEDIA" + "shori %u2, %0") + +(define_expand "movdi" + [(set (match_operand:DI 0 "general_movdst_operand" "") + (match_operand:DI 1 "general_movsrc_operand" ""))] + "" +{ + prepare_move_operands (operands, DImode); +}) + +(define_insn "movdf_media" + [(set (match_operand:DF 0 "general_movdst_operand" "=f,f,r,r,r,f,m,r,m") + (match_operand:DF 1 "general_movsrc_operand" "f,rZ,f,r,F,m,f,m,rZ"))] + "TARGET_SHMEDIA_FPU + && (register_operand (operands[0], DFmode) + || sh_register_operand (operands[1], DFmode))" + "@ + fmov.d %1, %0 + fmov.qd %N1, %0 + fmov.dq %1, %0 + add %1, r63, %0 + # + fld%M1.d %m1, %0 + fst%M0.d %m0, %1 + ld%M1.q %m1, %0 + st%M0.q %m0, %N1" + [(set_attr "type" "fmove_media,fload_media,dfpconv_media,arith_media,*, + fload_media,fstore_media,load_media,store_media")]) + +(define_insn "movdf_media_nofpu" + [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m") + (match_operand:DF 1 "general_movsrc_operand" "r,F,m,rZ"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], DFmode) + || sh_register_operand (operands[1], DFmode))" + "@ + add %1, r63, %0 + # + ld%M1.q %m1, %0 + st%M0.q %m0, %N1" + [(set_attr "type" "arith_media,*,load_media,store_media")]) + +(define_split + [(set (match_operand:DF 0 "arith_reg_dest" "") + (match_operand:DF 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 3) (match_dup 2))] +{ + int endian = WORDS_BIG_ENDIAN ? 1 : 0; + long values[2]; + REAL_VALUE_TYPE value; + + REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]); + REAL_VALUE_TO_TARGET_DOUBLE (value, values); + + if (HOST_BITS_PER_WIDE_INT >= 64) + operands[2] = immed_double_const ((unsigned long) values[endian] + | ((HOST_WIDE_INT) values[1 - endian] + << 32), 0, DImode); + else + { + gcc_assert (HOST_BITS_PER_WIDE_INT == 32); + operands[2] = immed_double_const (values[endian], values[1 - endian], + DImode); + } + + operands[3] = gen_rtx_REG (DImode, true_regnum (operands[0])); +}) + +;; FIXME: This should be a define_insn_and_split. +(define_insn "movdf_k" + [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m") + (match_operand:DF 1 "general_movsrc_operand" "r,FQ,m,r"))] + "TARGET_SH1 + && (! (TARGET_SH4 || TARGET_SH2A_DOUBLE) || reload_completed + /* ??? We provide some insn so that direct_{load,store}[DFmode] get set */ + || (REG_P (operands[0]) && REGNO (operands[0]) == 3) + || (REG_P (operands[1]) && REGNO (operands[1]) == 3)) + && (arith_reg_operand (operands[0], DFmode) + || arith_reg_operand (operands[1], DFmode))" +{ + return output_movedouble (insn, operands, DFmode); +} + [(set_attr "length" "4") + (set_attr "type" "move,pcload,load,store")]) + +;; All alternatives of movdf_i4 are split for ! TARGET_FMOVD. +;; However, the d/F/c/z alternative cannot be split directly; it is converted +;; with special code in machine_dependent_reorg into a load of the R0_REG and +;; the d/m/c/X alternative, which is split later into single-precision +;; instructions. And when not optimizing, no splits are done before fixing +;; up pcloads, so we need usable length information for that. +(define_insn "movdf_i4" + [(set (match_operand:DF 0 "general_movdst_operand" "=d,r,d,d,m,r,r,m,!??r,!???d") + (match_operand:DF 1 "general_movsrc_operand" "d,r,F,m,d,FQ,m,r,d,r")) + (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c")) + (clobber (match_scratch:SI 3 "=X,X,&z,X,X,X,X,X,X,X"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) + && (arith_reg_operand (operands[0], DFmode) + || arith_reg_operand (operands[1], DFmode))" + { + switch (which_alternative) + { + case 0: + if (TARGET_FMOVD) + return "fmov %1,%0"; + else if (REGNO (operands[0]) != REGNO (operands[1]) + 1) + return "fmov %R1,%R0" "\n" + " fmov %S1,%S0"; + else + return "fmov %S1,%S0" "\n" + " fmov %R1,%R0"; + case 3: + case 4: + return "fmov.d %1,%0"; + default: + return "#"; + } + } + [(set_attr_alternative "length" + [(if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 8)) + (const_int 4) + (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6)) + (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6)) + (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6)) + (const_int 4) + (const_int 8) (const_int 8) ;; these need only 8 bytes for @(r0,rn) + ;; We can't use 4-byte push/pop on SHcompact, so we have to + ;; increment or decrement r15 explicitly. + (if_then_else + (match_test "TARGET_SHCOMPACT") + (const_int 10) (const_int 8)) + (if_then_else + (match_test "TARGET_SHCOMPACT") + (const_int 10) (const_int 8))]) + (set_attr "type" "fmove,move,pcfload,fload,fstore,pcload,load,store,load,fload") + (set_attr "late_fp_use" "*,*,*,*,yes,*,*,*,*,*") + (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes") + (const_string "double") + (const_string "none")))]) + +;; Moving DFmode between fp/general registers through memory +;; (the top of the stack) is faster than moving through fpul even for +;; little endian. Because the type of an instruction is important for its +;; scheduling, it is beneficial to split these operations, rather than +;; emitting them in one single chunk, even if this will expose a stack +;; use that will prevent scheduling of other stack accesses beyond this +;; instruction. +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 "=X"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && reload_completed + && (true_regnum (operands[0]) < 16) != (true_regnum (operands[1]) < 16)" + [(const_int 0)] +{ + rtx insn, tos; + + if (TARGET_SH5 && true_regnum (operands[1]) < 16) + { + emit_move_insn (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, -8)); + tos = gen_tmp_stack_mem (DFmode, stack_pointer_rtx); + } + else + tos = gen_tmp_stack_mem (DFmode, + gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx)); + insn = emit_insn (gen_movdf_i4 (tos, operands[1], operands[2])); + if (! (TARGET_SH5 && true_regnum (operands[1]) < 16)) + add_reg_note (insn, REG_INC, stack_pointer_rtx); + if (TARGET_SH5 && true_regnum (operands[0]) < 16) + tos = gen_tmp_stack_mem (DFmode, stack_pointer_rtx); + else + tos = gen_tmp_stack_mem (DFmode, + gen_rtx_POST_INC (Pmode, stack_pointer_rtx)); + insn = emit_insn (gen_movdf_i4 (operands[0], tos, operands[2])); + if (TARGET_SH5 && true_regnum (operands[0]) < 16) + emit_move_insn (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, 8)); + else + add_reg_note (insn, REG_INC, stack_pointer_rtx); + DONE; +}) + +;; local-alloc sometimes allocates scratch registers even when not required, +;; so we must be prepared to handle these. + +;; Remove the use and clobber from a movdf_i4 so that we can use movdf_k. +(define_split + [(set (match_operand:DF 0 "general_movdst_operand" "") + (match_operand:DF 1 "general_movsrc_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) + && reload_completed + && true_regnum (operands[0]) < 16 + && true_regnum (operands[1]) < 16" + [(set (match_dup 0) (match_dup 1))] +{ + /* If this was a reg <-> mem operation with base + index reg addressing, + we have to handle this in a special way. */ + rtx mem = operands[0]; + int store_p = 1; + if (! memory_operand (mem, DFmode)) + { + mem = operands[1]; + store_p = 0; + } + if (GET_CODE (mem) == SUBREG && SUBREG_BYTE (mem) == 0) + mem = SUBREG_REG (mem); + if (MEM_P (mem)) + { + rtx addr = XEXP (mem, 0); + if (GET_CODE (addr) == PLUS + && REG_P (XEXP (addr, 0)) + && REG_P (XEXP (addr, 1))) + { + int offset; + rtx reg0 = gen_rtx_REG (Pmode, 0); + rtx regop = operands[store_p], word0 ,word1; + + if (GET_CODE (regop) == SUBREG) + alter_subreg (®op, true); + if (REGNO (XEXP (addr, 0)) == REGNO (XEXP (addr, 1))) + offset = 2; + else + offset = 4; + mem = copy_rtx (mem); + PUT_MODE (mem, SImode); + word0 = gen_rtx_SUBREG (SImode, regop, 0); + alter_subreg (&word0, true); + word1 = gen_rtx_SUBREG (SImode, regop, 4); + alter_subreg (&word1, true); + if (store_p || ! refers_to_regno_p (REGNO (word0), + REGNO (word0) + 1, addr, 0)) + { + emit_insn (store_p + ? gen_movsi_ie (mem, word0) + : gen_movsi_ie (word0, mem)); + emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset))); + mem = copy_rtx (mem); + emit_insn (store_p + ? gen_movsi_ie (mem, word1) + : gen_movsi_ie (word1, mem)); + emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset))); + } + else + { + emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset))); + emit_insn (gen_movsi_ie (word1, mem)); + emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset))); + mem = copy_rtx (mem); + emit_insn (gen_movsi_ie (word0, mem)); + } + DONE; + } + } +}) + +;; Split away the clobber of r0 after machine_dependent_reorg has fixed pcloads. +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (reg:SI R0_REG))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (use (match_dup 2)) + (clobber (scratch:SI))])] + "") + +(define_expand "reload_indf__frn" + [(parallel [(set (match_operand:DF 0 "register_operand" "=a") + (match_operand:DF 1 "immediate_operand" "FQ")) + (use (reg:PSI FPSCR_REG)) + (clobber (match_operand:SI 2 "register_operand" "=&z"))])] + "TARGET_SH1" + "") + +(define_expand "reload_outdf__RnFRm" + [(parallel [(set (match_operand:DF 0 "register_operand" "=r,f") + (match_operand:DF 1 "register_operand" "af,r")) + (clobber (match_operand:SI 2 "register_operand" "=&y,y"))])] + "TARGET_SH1" + "") + +;; Simplify no-op moves. +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "TARGET_SH2E && reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(set (match_dup 0) (match_dup 0))] + "") + +;; fmovd substitute post-reload splits +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "TARGET_SH4 && ! TARGET_FMOVD && reload_completed + && FP_OR_XD_REGISTER_P (true_regnum (operands[0])) + && FP_OR_XD_REGISTER_P (true_regnum (operands[1]))" + [(const_int 0)] +{ + int dst = true_regnum (operands[0]), src = true_regnum (operands[1]); + emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, dst), + gen_rtx_REG (SFmode, src), operands[2])); + emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, dst + 1), + gen_rtx_REG (SFmode, src + 1), operands[2])); + DONE; +}) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (mem:DF (match_operand:SI 1 "register_operand" ""))) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed + && FP_OR_XD_REGISTER_P (true_regnum (operands[0])) + && find_regno_note (insn, REG_DEAD, true_regnum (operands[1]))" + [(const_int 0)] +{ + int regno = true_regnum (operands[0]); + rtx insn; + rtx mem = SET_SRC (XVECEXP (PATTERN (curr_insn), 0, 0)); + rtx mem2 + = change_address (mem, SFmode, gen_rtx_POST_INC (Pmode, operands[1])); + insn = emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, + regno + SH_REG_MSW_OFFSET), + mem2, operands[2])); + add_reg_note (insn, REG_INC, operands[1]); + insn = emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, + regno + SH_REG_LSW_OFFSET), + change_address (mem, SFmode, NULL_RTX), + operands[2])); + DONE; +}) + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed + && FP_OR_XD_REGISTER_P (true_regnum (operands[0]))" + [(const_int 0)] +{ + int regno = true_regnum (operands[0]); + rtx addr, insn; + rtx mem2 = change_address (operands[1], SFmode, NULL_RTX); + rtx reg0 = gen_rtx_REG (SFmode, regno + SH_REG_MSW_OFFSET); + rtx reg1 = gen_rtx_REG (SFmode, regno + SH_REG_LSW_OFFSET); + + operands[1] = copy_rtx (mem2); + addr = XEXP (mem2, 0); + + switch (GET_CODE (addr)) + { + case REG: + /* This is complicated. If the register is an arithmetic register + we can just fall through to the REG+DISP case below. Otherwise + we have to use a combination of POST_INC and REG addressing... */ + if (! arith_reg_operand (operands[1], SFmode)) + { + XEXP (mem2, 0) = addr = gen_rtx_POST_INC (SImode, addr); + insn = emit_insn (gen_movsf_ie (reg0, mem2, operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + + emit_insn (gen_movsf_ie (reg1, operands[1], operands[2])); + + /* If we have modified the stack pointer, the value that we have + read with post-increment might be modified by an interrupt, + so write it back. */ + if (REGNO (XEXP (addr, 0)) == STACK_POINTER_REGNUM) + emit_insn (gen_push_e (reg0)); + else + emit_insn (gen_addsi3 (XEXP (operands[1], 0), XEXP (operands[1], 0), + GEN_INT (-4))); + break; + } + /* Fall through. */ + + case PLUS: + emit_insn (gen_movsf_ie (reg0, operands[1], operands[2])); + operands[1] = copy_rtx (operands[1]); + XEXP (operands[1], 0) = plus_constant (Pmode, addr, 4); + emit_insn (gen_movsf_ie (reg1, operands[1], operands[2])); + break; + + case POST_INC: + insn = emit_insn (gen_movsf_ie (reg0, operands[1], operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + + insn = emit_insn (gen_movsf_ie (reg1, operands[1], operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + break; + + default: + debug_rtx (addr); + gcc_unreachable (); + } + + DONE; +}) + +(define_split + [(set (match_operand:DF 0 "memory_operand" "") + (match_operand:DF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 ""))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed + && FP_OR_XD_REGISTER_P (true_regnum (operands[1]))" + [(const_int 0)] +{ + int regno = true_regnum (operands[1]); + rtx insn, addr; + rtx reg0 = gen_rtx_REG (SFmode, regno + SH_REG_MSW_OFFSET); + rtx reg1 = gen_rtx_REG (SFmode, regno + SH_REG_LSW_OFFSET); + + operands[0] = copy_rtx (operands[0]); + PUT_MODE (operands[0], SFmode); + addr = XEXP (operands[0], 0); + + switch (GET_CODE (addr)) + { + case REG: + /* This is complicated. If the register is an arithmetic register + we can just fall through to the REG+DISP case below. Otherwise + we have to use a combination of REG and PRE_DEC addressing... */ + if (! arith_reg_operand (operands[0], SFmode)) + { + emit_insn (gen_addsi3 (addr, addr, GEN_INT (4))); + emit_insn (gen_movsf_ie (operands[0], reg1, operands[2])); + + operands[0] = copy_rtx (operands[0]); + XEXP (operands[0], 0) = addr = gen_rtx_PRE_DEC (SImode, addr); + + insn = emit_insn (gen_movsf_ie (operands[0], reg0, operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + break; + } + /* Fall through. */ + + case PLUS: + /* Since REG+DISP addressing has already been decided upon by gcc + we can rely upon it having chosen an arithmetic register as the + register component of the address. Just emit the lower numbered + register first, to the lower address, then the higher numbered + register to the higher address. */ + emit_insn (gen_movsf_ie (operands[0], reg0, operands[2])); + + operands[0] = copy_rtx (operands[0]); + XEXP (operands[0], 0) = plus_constant (Pmode, addr, 4); + + emit_insn (gen_movsf_ie (operands[0], reg1, operands[2])); + break; + + case PRE_DEC: + /* This is easy. Output the word to go to the higher address + first (ie the word in the higher numbered register) then the + word to go to the lower address. */ + + insn = emit_insn (gen_movsf_ie (operands[0], reg1, operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + + insn = emit_insn (gen_movsf_ie (operands[0], reg0, operands[2])); + add_reg_note (insn, REG_INC, XEXP (addr, 0)); + break; + + default: + /* FAIL; */ + debug_rtx (addr); + gcc_unreachable (); + } + + DONE; +}) + +;; If the output is a register and the input is memory or a register, we have +;; to be careful and see which word needs to be loaded first. +(define_split + [(set (match_operand:DF 0 "general_movdst_operand" "") + (match_operand:DF 1 "general_movsrc_operand" ""))] + "TARGET_SH1 && reload_completed" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 4) (match_dup 5))] +{ + int regno; + + if ((MEM_P (operands[0]) + && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC) + || (MEM_P (operands[1]) + && GET_CODE (XEXP (operands[1], 0)) == POST_INC)) + FAIL; + + switch (GET_CODE (operands[0])) + { + case REG: + regno = REGNO (operands[0]); + break; + case SUBREG: + regno = subreg_regno (operands[0]); + break; + case MEM: + regno = -1; + break; + default: + gcc_unreachable (); + } + + if (regno == -1 + || ! refers_to_regno_p (regno, regno + 1, operands[1], 0)) + { + operands[2] = operand_subword (operands[0], 0, 0, DFmode); + operands[3] = operand_subword (operands[1], 0, 0, DFmode); + operands[4] = operand_subword (operands[0], 1, 0, DFmode); + operands[5] = operand_subword (operands[1], 1, 0, DFmode); + } + else + { + operands[2] = operand_subword (operands[0], 1, 0, DFmode); + operands[3] = operand_subword (operands[1], 1, 0, DFmode); + operands[4] = operand_subword (operands[0], 0, 0, DFmode); + operands[5] = operand_subword (operands[1], 0, 0, DFmode); + } + + if (operands[2] == 0 || operands[3] == 0 + || operands[4] == 0 || operands[5] == 0) + FAIL; +}) + +(define_expand "movdf" + [(set (match_operand:DF 0 "general_movdst_operand" "") + (match_operand:DF 1 "general_movsrc_operand" ""))] + "" +{ + prepare_move_operands (operands, DFmode); + if (TARGET_SHMEDIA) + { + if (TARGET_SHMEDIA_FPU) + emit_insn (gen_movdf_media (operands[0], operands[1])); + else + emit_insn (gen_movdf_media_nofpu (operands[0], operands[1])); + DONE; + } + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + emit_df_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ())); + DONE; + } +}) + +;;This is incompatible with the way gcc uses subregs. +;;(define_insn "movv2sf_i" +;; [(set (match_operand:V2SF 0 "nonimmediate_operand" "=f,f,m") +;; (match_operand:V2SF 1 "nonimmediate_operand" "f,m,f"))] +;; "TARGET_SHMEDIA_FPU +;; && (fp_arith_reg_operand (operands[0], V2SFmode) +;; || fp_arith_reg_operand (operands[1], V2SFmode))" +;; "@ +;; # +;; fld%M1.p %m1, %0 +;; fst%M0.p %m0, %1" +;; [(set_attr "type" "*,fload_media,fstore_media")]) +(define_insn_and_split "movv2sf_i" + [(set (match_operand:V2SF 0 "general_movdst_operand" "=f,rf,r,m,mf") + (match_operand:V2SF 1 "general_operand" "fm,rfm?,F?,f,rfZ?"))] + "TARGET_SHMEDIA_FPU" + "#" + "TARGET_SHMEDIA_FPU && reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + operands[0] = simplify_gen_subreg (DFmode, operands[0], V2SFmode, 0); + operands[1] = simplify_gen_subreg (DFmode, operands[1], V2SFmode, 0); +}) + +(define_expand "movv2sf" + [(set (match_operand:V2SF 0 "general_movdst_operand" "") + (match_operand:V2SF 1 "nonimmediate_operand" ""))] + "TARGET_SHMEDIA_FPU" +{ + prepare_move_operands (operands, V2SFmode); +}) + +(define_expand "addv2sf3" + [(match_operand:V2SF 0 "fp_arith_reg_operand" "") + (match_operand:V2SF 1 "fp_arith_reg_operand" "") + (match_operand:V2SF 2 "fp_arith_reg_operand" "")] + "TARGET_SHMEDIA_FPU" +{ + sh_expand_binop_v2sf (PLUS, operands[0], operands[1], operands[2]); + DONE; +}) + +(define_expand "subv2sf3" + [(match_operand:V2SF 0 "fp_arith_reg_operand" "") + (match_operand:V2SF 1 "fp_arith_reg_operand" "") + (match_operand:V2SF 2 "fp_arith_reg_operand" "")] + "TARGET_SHMEDIA_FPU" +{ + sh_expand_binop_v2sf (MINUS, operands[0], operands[1], operands[2]); + DONE; +}) + +(define_expand "mulv2sf3" + [(match_operand:V2SF 0 "fp_arith_reg_operand" "") + (match_operand:V2SF 1 "fp_arith_reg_operand" "") + (match_operand:V2SF 2 "fp_arith_reg_operand" "")] + "TARGET_SHMEDIA_FPU" +{ + sh_expand_binop_v2sf (MULT, operands[0], operands[1], operands[2]); + DONE; +}) + +(define_expand "divv2sf3" + [(match_operand:V2SF 0 "fp_arith_reg_operand" "") + (match_operand:V2SF 1 "fp_arith_reg_operand" "") + (match_operand:V2SF 2 "fp_arith_reg_operand" "")] + "TARGET_SHMEDIA_FPU" +{ + sh_expand_binop_v2sf (DIV, operands[0], operands[1], operands[2]); + DONE; +}) + +(define_insn_and_split "*movv4sf_i" + [(set (match_operand:V4SF 0 "general_movdst_operand" "=f,rf,r,m,mf") + (match_operand:V4SF 1 "general_operand" "fm,rfm?,F?,f,rfZ?"))] + "TARGET_SHMEDIA_FPU" + "#" + "&& reload_completed" + [(const_int 0)] +{ + for (int i = 0; i < 4/2; i++) + { + rtx x, y; + + if (MEM_P (operands[0])) + x = adjust_address (operands[0], V2SFmode, + i * GET_MODE_SIZE (V2SFmode)); + else + x = simplify_gen_subreg (V2SFmode, operands[0], V4SFmode, i * 8); + + if (MEM_P (operands[1])) + y = adjust_address (operands[1], V2SFmode, + i * GET_MODE_SIZE (V2SFmode)); + else + y = simplify_gen_subreg (V2SFmode, operands[1], V4SFmode, i * 8); + + emit_insn (gen_movv2sf_i (x, y)); + } + + DONE; +} + [(set_attr "length" "8")]) + +(define_expand "movv4sf" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "") + (match_operand:V4SF 1 "general_operand" ""))] + "TARGET_SHMEDIA_FPU" +{ + prepare_move_operands (operands, V4SFmode); +}) + +(define_insn_and_split "*movv16sf_i" + [(set (match_operand:V16SF 0 "nonimmediate_operand" "=f,f,m") + (match_operand:V16SF 1 "nonimmediate_operand" "f,m,f"))] + "TARGET_SHMEDIA_FPU" + "#" + "&& reload_completed" + [(const_int 0)] +{ + for (int i = 0; i < 16/2; i++) + { + rtx x, y; + + if (MEM_P (operands[0])) + x = adjust_address (operands[0], V2SFmode, + i * GET_MODE_SIZE (V2SFmode)); + else + { + x = gen_rtx_SUBREG (V2SFmode, operands[0], i * 8); + alter_subreg (&x, true); + } + + if (MEM_P (operands[1])) + y = adjust_address (operands[1], V2SFmode, + i * GET_MODE_SIZE (V2SFmode)); + else + { + y = gen_rtx_SUBREG (V2SFmode, operands[1], i * 8); + alter_subreg (&y, true); + } + + emit_insn (gen_movv2sf_i (x, y)); + } + + DONE; +} + [(set_attr "length" "32")]) + +(define_expand "movv16sf" + [(set (match_operand:V16SF 0 "nonimmediate_operand" "=f,f,m") + (match_operand:V16SF 1 "nonimmediate_operand" "f,m,f"))] + "TARGET_SHMEDIA_FPU" +{ + prepare_move_operands (operands, V16SFmode); +}) + +(define_insn "movsf_media" + [(set (match_operand:SF 0 "general_movdst_operand" "=f,f,r,r,r,f,m,r,m") + (match_operand:SF 1 "general_movsrc_operand" "f,rZ,f,r,F,m,f,m,rZ"))] + "TARGET_SHMEDIA_FPU + && (register_operand (operands[0], SFmode) + || sh_register_operand (operands[1], SFmode))" + "@ + fmov.s %1, %0 + fmov.ls %N1, %0 + fmov.sl %1, %0 + add.l %1, r63, %0 + # + fld%M1.s %m1, %0 + fst%M0.s %m0, %1 + ld%M1.l %m1, %0 + st%M0.l %m0, %N1" + [(set_attr "type" "fmove_media,fload_media,fpconv_media,arith_media,*,fload_media,fstore_media,load_media,store_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_insn "movsf_media_nofpu" + [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,m") + (match_operand:SF 1 "general_movsrc_operand" "r,F,m,rZ"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], SFmode) + || sh_register_operand (operands[1], SFmode))" + "@ + add.l %1, r63, %0 + # + ld%M1.l %m1, %0 + st%M0.l %m0, %N1" + [(set_attr "type" "arith_media,*,load_media,store_media") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_split + [(set (match_operand:SF 0 "arith_reg_dest" "") + (match_operand:SF 1 "immediate_operand" ""))] + "TARGET_SHMEDIA && reload_completed + && ! FP_REGISTER_P (true_regnum (operands[0]))" + [(set (match_dup 3) (match_dup 2))] +{ + long values; + REAL_VALUE_TYPE value; + + REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]); + REAL_VALUE_TO_TARGET_SINGLE (value, values); + operands[2] = GEN_INT (values); + + operands[3] = gen_rtx_REG (DImode, true_regnum (operands[0])); +}) + +(define_insn "movsf_i" + [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,r,m,l,r") + (match_operand:SF 1 "general_movsrc_operand" "r,G,FQ,mr,r,r,l"))] + "TARGET_SH1 + && (! TARGET_SH2E + /* ??? We provide some insn so that direct_{load,store}[SFmode] get set */ + || (REG_P (operands[0]) && REGNO (operands[0]) == 3) + || (REG_P (operands[1]) && REGNO (operands[1]) == 3)) + && (arith_reg_operand (operands[0], SFmode) + || arith_reg_operand (operands[1], SFmode))" + "@ + mov %1,%0 + mov #0,%0 + mov.l %1,%0 + mov.l %1,%0 + mov.l %1,%0 + lds %1,%0 + sts %1,%0" + [(set_attr "type" "move,move,pcload,load,store,move,move")]) + +;; We may not split the ry/yr/XX alternatives to movsi_ie, since +;; update_flow_info would not know where to put REG_EQUAL notes +;; when the destination changes mode. +(define_insn "movsf_ie" + [(set (match_operand:SF 0 "general_movdst_operand" + "=f,r,f,f,fy,f,m,r,r,m,f,y,y,rf,r,y,<,y,y") + (match_operand:SF 1 "general_movsrc_operand" + "f,r,G,H,FQ,mf,f,FQ,mr,r,y,f,>,fr,y,r,y,>,y")) + (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c")) + (clobber (match_scratch:SI 3 "=X,X,Bsc,Bsc,&z,X,X,X,X,X,X,X,X,y,X,X,X,X,X"))] + "TARGET_SH2E + && (arith_reg_operand (operands[0], SFmode) || fpul_operand (operands[0], SFmode) + || arith_reg_operand (operands[1], SFmode) || fpul_operand (operands[1], SFmode) + || arith_reg_operand (operands[3], SImode))" + "@ + fmov %1,%0 + mov %1,%0 + fldi0 %0 + fldi1 %0 + # + fmov.s %1,%0 + fmov.s %1,%0 + mov.l %1,%0 + mov.l %1,%0 + mov.l %1,%0 + fsts fpul,%0 + flds %1,fpul + lds.l %1,%0 + # + sts %1,%0 + lds %1,%0 + sts.l %1,%0 + lds.l %1,%0 + ! move optimized away" + [(set_attr "type" "fmove,move,fmove,fmove,pcfload,fload,fstore,pcload,load, + store,fmove,fmove,load,*,fpul_gp,gp_fpul,fstore,load,nil") + (set_attr "late_fp_use" "*,*,*,*,*,*,yes,*,*,*,*,*,*,*,yes,*,yes,*,*") + (set_attr_alternative "length" + [(const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 4) + (if_then_else + (match_test "TARGET_SH2A") + (const_int 4) (const_int 2)) + (if_then_else + (match_test "TARGET_SH2A") + (const_int 4) (const_int 2)) + (const_int 2) + (if_then_else + (match_test "TARGET_SH2A") + (const_int 4) (const_int 2)) + (if_then_else + (match_test "TARGET_SH2A") + (const_int 4) (const_int 2)) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 4) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 2) + (const_int 0)]) + (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes") + (const_string "single") + (const_string "single")))]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (reg:SI FPUL_REG))] + "TARGET_SH1" + [(parallel [(set (reg:SF FPUL_REG) (match_dup 1)) + (use (match_dup 2)) + (clobber (scratch:SI))]) + (parallel [(set (match_dup 0) (reg:SF FPUL_REG)) + (use (match_dup 2)) + (clobber (scratch:SI))])] + "") + +(define_expand "movsf" + [(set (match_operand:SF 0 "general_movdst_operand" "") + (match_operand:SF 1 "general_movsrc_operand" ""))] + "" +{ + prepare_move_operands (operands, SFmode); + if (TARGET_SHMEDIA) + { + if (TARGET_SHMEDIA_FPU) + emit_insn (gen_movsf_media (operands[0], operands[1])); + else + emit_insn (gen_movsf_media_nofpu (operands[0], operands[1])); + DONE; + } + if (TARGET_SH2E) + { + emit_sf_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ())); + DONE; + } +}) + +(define_insn "mov_nop" + [(set (match_operand 0 "any_register_operand" "") (match_dup 0))] + "TARGET_SH2E" + "" + [(set_attr "length" "0") + (set_attr "type" "nil")]) + +(define_expand "reload_insf__frn" + [(parallel [(set (match_operand:SF 0 "register_operand" "=a") + (match_operand:SF 1 "immediate_operand" "FQ")) + (use (reg:PSI FPSCR_REG)) + (clobber (match_operand:SI 2 "register_operand" "=&z"))])] + "TARGET_SH1" + "") + +(define_expand "reload_insi__i_fpul" + [(parallel [(set (match_operand:SI 0 "fpul_operand" "=y") + (match_operand:SI 1 "immediate_operand" "i")) + (clobber (match_operand:SI 2 "register_operand" "=&z"))])] + "TARGET_SH1" + "") + +(define_expand "ptabs" + [(set (match_operand 0 "" "=b") (match_operand 1 "" "r"))] + "TARGET_SHMEDIA" +{ + if (!TARGET_PT_FIXED) + { + rtx eq = operands[1]; + + /* ??? For canonical RTL we really should remove any CONST from EQ + before wrapping it in the AND, and finally wrap the EQ into a + const if is constant. However, for reload we must expose the + input register or symbolic constant, and we can't have + different insn structures outside of the operands for different + alternatives of the same pattern. */ + eq = gen_rtx_EQ (SImode, gen_rtx_AND (Pmode, eq, GEN_INT (3)), + GEN_INT (3)); + operands[1] + = (gen_rtx_IF_THEN_ELSE + (PDImode, + eq, + gen_rtx_MEM (PDImode, operands[1]), + gen_rtx_fmt_e (TARGET_SHMEDIA32 ? SIGN_EXTEND : TRUNCATE, + PDImode, operands[1]))); + } +}) + +;; expanded by ptabs expander. +(define_insn "*extendsipdi_media" + [(set (match_operand:PDI 0 "target_reg_operand" "=b,b"); + (if_then_else:PDI (eq (and:SI (match_operand:SI 1 "target_operand" + "r,Csy") + (const_int 3)) + (const_int 3)) + (mem:PDI (match_dup 1)) + (sign_extend:PDI (match_dup 1))))] + "TARGET_SHMEDIA && !TARGET_PT_FIXED" + "@ + ptabs %1, %0 + pt %1, %0" + [(set_attr "type" "ptabs_media,pt_media") + (set_attr "length" "4,*")]) + +(define_insn "*truncdipdi_media" + [(set (match_operand:PDI 0 "target_reg_operand" "=b,b"); + (if_then_else:PDI (eq (and:DI (match_operand:DI 1 "target_operand" + "r,Csy") + (const_int 3)) + (const_int 3)) + (mem:PDI (match_dup 1)) + (truncate:PDI (match_dup 1))))] + "TARGET_SHMEDIA && !TARGET_PT_FIXED" + "@ + ptabs %1, %0 + pt %1, %0" + [(set_attr "type" "ptabs_media,pt_media") + (set_attr "length" "4,*")]) + +(define_insn "*movsi_y" + [(set (match_operand:SI 0 "register_operand" "=y,y") + (match_operand:SI 1 "immediate_operand" "Qi,I08")) + (clobber (match_scratch:SI 2 "=&z,r"))] + "TARGET_SH2E + && (reload_in_progress || reload_completed)" + "#" + [(set_attr "length" "4") + (set_attr "type" "pcload,move")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "immediate_operand" "")) + (clobber (match_operand:SI 2 "register_operand" ""))] + "TARGET_SH1" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +;; ------------------------------------------------------------------------ +;; Define the real conditional branch instructions. +;; ------------------------------------------------------------------------ + +(define_expand "branch_true" + [(set (pc) (if_then_else (ne (reg:SI T_REG) (const_int 0)) + (label_ref (match_operand 0)) + (pc)))] + "TARGET_SH1") + +(define_expand "branch_false" + [(set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (label_ref (match_operand 0)) + (pc)))] + "TARGET_SH1") + +(define_insn_and_split "*cbranch_t" + [(set (pc) (if_then_else (match_operand 1 "cbranch_treg_value") + (label_ref (match_operand 0)) + (pc)))] + "TARGET_SH1" +{ + return output_branch (sh_eval_treg_value (operands[1]), insn, operands); +} + "&& 1" + [(const_int 0)] +{ + /* Try to canonicalize the branch condition if it is not one of: + (ne (reg:SI T_REG) (const_int 0)) + (eq (reg:SI T_REG) (const_int 0)) + + Instead of splitting out a new insn, we modify the current insn's + operands as needed. This preserves things such as REG_DEAD notes. */ + + if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) + && REG_P (XEXP (operands[1], 0)) && REGNO (XEXP (operands[1], 0)) == T_REG + && XEXP (operands[1], 1) == const0_rtx) + DONE; + + int branch_cond = sh_eval_treg_value (operands[1]); + rtx new_cond_rtx = NULL_RTX; + + if (branch_cond == 0) + new_cond_rtx = gen_rtx_EQ (VOIDmode, get_t_reg_rtx (), const0_rtx); + else if (branch_cond == 1) + new_cond_rtx = gen_rtx_NE (VOIDmode, get_t_reg_rtx (), const0_rtx); + + if (new_cond_rtx != NULL_RTX) + validate_change (curr_insn, &XEXP (XEXP (PATTERN (curr_insn), 1), 0), + new_cond_rtx, false); + DONE; +} + [(set_attr "type" "cbranch")]) + +;; Patterns to prevent reorg from re-combining a condbranch with a branch +;; which destination is too far away. +;; The const_int_operand is distinct for each branch target; it avoids +;; unwanted matches with redundant_insn. +(define_insn "block_branch_redirect" + [(set (pc) (unspec [(match_operand 0 "const_int_operand" "")] UNSPEC_BBR))] + "TARGET_SH1" + "" + [(set_attr "length" "0")]) + +;; This one has the additional purpose to record a possible scratch register +;; for the following branch. +;; ??? Unfortunately, just setting the scratch register is not good enough, +;; because the insn then might be deemed dead and deleted. And we can't +;; make the use in the jump insn explicit because that would disable +;; delay slot scheduling from the target. +(define_insn "indirect_jump_scratch" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand 1 "const_int_operand" "")] UNSPEC_BBR)) + (set (pc) (unspec [(const_int 0)] UNSPEC_BBR))] + "TARGET_SH1" + "" + [(set_attr "length" "0")]) + +;; This one is used to preemt an insn from beyond the bra / braf / jmp +;; being pulled into the delay slot of a condbranch that has been made to +;; jump around the unconditional jump because it was out of range. +(define_insn "stuff_delay_slot" + [(set (pc) + (unspec [(match_operand:SI 0 "const_int_operand" "") (pc) + (match_operand:SI 1 "const_int_operand" "")] UNSPEC_BBR))] + "TARGET_SH1" + "" + [(set_attr "length" "0") + (set_attr "cond_delay_slot" "yes")]) + +;; Conditional branch insns + +(define_expand "cbranchint4_media" + [(set (pc) + (if_then_else (match_operator 0 "shmedia_cbranch_comparison_operator" + [(match_operand 1 "" "") + (match_operand 2 "" "")]) + (match_operand 3 "" "") + (pc)))] + "TARGET_SHMEDIA" +{ + enum machine_mode mode = GET_MODE (operands[1]); + if (mode == VOIDmode) + mode = GET_MODE (operands[2]); + if (GET_CODE (operands[0]) == EQ || GET_CODE (operands[0]) == NE) + { + operands[1] = force_reg (mode, operands[1]); + if (CONSTANT_P (operands[2]) + && (! satisfies_constraint_I06 (operands[2]))) + operands[2] = force_reg (mode, operands[2]); + } + else + { + if (operands[1] != const0_rtx) + operands[1] = force_reg (mode, operands[1]); + if (operands[2] != const0_rtx) + operands[2] = force_reg (mode, operands[2]); + } + switch (GET_CODE (operands[0])) + { + case LEU: + case LE: + case LTU: + case LT: + operands[0] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[0])), + VOIDmode, operands[2], operands[1]); + operands[1] = XEXP (operands[0], 0); + operands[2] = XEXP (operands[0], 1); + break; + default: + operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), + VOIDmode, operands[1], operands[2]); + break; + } + operands[3] = gen_rtx_LABEL_REF (Pmode, operands[3]); +}) + +(define_expand "cbranchfp4_media" + [(set (pc) + (if_then_else (match_operator 0 "sh_float_comparison_operator" + [(match_operand 1 "" "") + (match_operand 2 "" "")]) + (match_operand 3 "" "") + (pc)))] + "TARGET_SHMEDIA" +{ + rtx tmp = gen_reg_rtx (SImode); + rtx cmp; + if (GET_CODE (operands[0]) == NE) + cmp = gen_rtx_EQ (SImode, operands[1], operands[2]); + else + cmp = gen_rtx_fmt_ee (GET_CODE (operands[0]), SImode, + operands[1], operands[2]); + + emit_insn (gen_cstore4_media (tmp, cmp, operands[1], operands[2])); + + if (GET_CODE (cmp) == GET_CODE (operands[0])) + operands[0] = gen_rtx_NE (VOIDmode, tmp, const0_rtx); + else + operands[0] = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + operands[1] = tmp; + operands[2] = const0_rtx; + operands[3] = gen_rtx_LABEL_REF (Pmode, operands[3]); +}) + +(define_insn "*beq_media_i" + [(set (pc) + (if_then_else (match_operator 3 "equality_comparison_operator" + [(match_operand:DI 1 "arith_reg_operand" "r,r") + (match_operand:DI 2 "arith_operand" "r,I06")]) + (match_operand 0 "target_operand" "b,b") + (pc)))] + "TARGET_SHMEDIA" + "@ + b%o3%' %1, %2, %0%> + b%o3i%' %1, %2, %0%>" + [(set_attr "type" "cbranch_media")]) + +(define_insn "*beq_media_i32" + [(set (pc) + (if_then_else (match_operator 3 "equality_comparison_operator" + [(match_operand:SI 1 "arith_reg_operand" "r,r") + (match_operand:SI 2 "arith_operand" "r,I06")]) + (match_operand 0 "target_operand" "b,b") + (pc)))] + "TARGET_SHMEDIA" + "@ + b%o3%' %1, %2, %0%> + b%o3i%' %1, %2, %0%>" + [(set_attr "type" "cbranch_media")]) + +(define_insn "*bgt_media_i" + [(set (pc) + (if_then_else (match_operator 3 "greater_comparison_operator" + [(match_operand:DI 1 "arith_reg_or_0_operand" "rN") + (match_operand:DI 2 "arith_reg_or_0_operand" "rN")]) + (match_operand 0 "target_operand" "b") + (pc)))] + "TARGET_SHMEDIA" + "b%o3%' %N1, %N2, %0%>" + [(set_attr "type" "cbranch_media")]) + +(define_insn "*bgt_media_i32" + [(set (pc) + (if_then_else (match_operator 3 "greater_comparison_operator" + [(match_operand:SI 1 "arith_reg_or_0_operand" "rN") + (match_operand:SI 2 "arith_reg_or_0_operand" "rN")]) + (match_operand 0 "target_operand" "b") + (pc)))] + "TARGET_SHMEDIA" + "b%o3%' %N1, %N2, %0%>" + [(set_attr "type" "cbranch_media")]) + +;; These are only needed to make invert_jump() happy - otherwise, jump +;; optimization will be silently disabled. +(define_insn "*blt_media_i" + [(set (pc) + (if_then_else (match_operator 3 "less_comparison_operator" + [(match_operand:DI 1 "arith_reg_or_0_operand" "rN") + (match_operand:DI 2 "arith_reg_or_0_operand" "rN")]) + (match_operand 0 "target_operand" "b") + (pc)))] + "TARGET_SHMEDIA" + "b%o3%' %N2, %N1, %0%>" + [(set_attr "type" "cbranch_media")]) + +(define_insn "*blt_media_i32" + [(set (pc) + (if_then_else (match_operator 3 "less_comparison_operator" + [(match_operand:SI 1 "arith_reg_or_0_operand" "rN") + (match_operand:SI 2 "arith_reg_or_0_operand" "rN")]) + (match_operand 0 "target_operand" "b") + (pc)))] + "TARGET_SHMEDIA" + "b%o3%' %N2, %N1, %0%>" + [(set_attr "type" "cbranch_media")]) + +;; combiner splitter for test-and-branch on single bit in register. This +;; is endian dependent because the non-paradoxical subreg looks different +;; on big endian. +(define_split + [(set (pc) + (if_then_else + (match_operator 3 "equality_comparison_operator" + [(subreg:SI + (zero_extract:DI + (subreg:DI (match_operand:SI 1 "extend_reg_operand" "") 0) + (const_int 1) + (match_operand 2 "const_int_operand" "")) 0) + (const_int 0)]) + (match_operand 0 "target_operand" "") + (pc))) + (clobber (match_operand:SI 4 "arith_reg_dest" ""))] + "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN" + [(set (match_dup 4) (ashift:SI (match_dup 1) (match_dup 5))) + (set (pc) (if_then_else (match_dup 6) (match_dup 0) (pc)))] +{ + operands[5] = GEN_INT (31 - INTVAL (operands[2])); + operands[6] = (GET_CODE (operands[3]) == EQ + ? gen_rtx_GE (VOIDmode, operands[4], const0_rtx) + : gen_rtx_GT (VOIDmode, const0_rtx, operands[4])); +}) + +; operand 0 is the loop count pseudo register +; operand 1 is the label to jump to at the top of the loop +(define_expand "doloop_end" + [(parallel [(set (pc) + (if_then_else (ne:SI (match_operand:SI 0 "" "") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) + (plus:SI (match_dup 0) (const_int -1))) + (clobber (reg:SI T_REG))])] + "TARGET_SH2" +{ + if (GET_MODE (operands[0]) != SImode) + FAIL; + emit_jump_insn (gen_doloop_end_split (operands[0], operands[1], operands[0])); + DONE; +}) + +(define_insn_and_split "doloop_end_split" + [(set (pc) + (if_then_else (ne:SI (match_operand:SI 2 "arith_reg_dest" "0") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 0 "arith_reg_dest" "=r") + (plus:SI (match_dup 2) (const_int -1))) + (clobber (reg:SI T_REG))] + "TARGET_SH2" + "#" + "" + [(parallel [(set (reg:SI T_REG) + (eq:SI (match_dup 2) (const_int 1))) + (set (match_dup 0) (plus:SI (match_dup 2) (const_int -1)))]) + (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (label_ref (match_dup 1)) + (pc)))] + "" + [(set_attr "type" "cbranch")]) + +;; ------------------------------------------------------------------------ +;; Jump and linkage insns +;; ------------------------------------------------------------------------ + +(define_insn "jump_compact" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "TARGET_SH1 && !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)" +{ + /* The length is 16 if the delay slot is unfilled. */ + if (get_attr_length(insn) > 4) + return output_far_jump(insn, operands[0]); + else + return "bra %l0%#"; +} + [(set_attr "type" "jump") + (set_attr "needs_delay_slot" "yes")]) + +;; ??? It would be much saner to explicitly use the scratch register +;; in the jump insn, and have indirect_jump_scratch only set it, +;; but fill_simple_delay_slots would refuse to do delay slot filling +;; from the target then, as it uses simplejump_p. +;;(define_insn "jump_compact_far" +;; [(set (pc) +;; (label_ref (match_operand 0 "" ""))) +;; (use (match_operand 1 "register_operand" "r")] +;; "TARGET_SH1" +;; "* return output_far_jump(insn, operands[0], operands[1]);" +;; [(set_attr "type" "jump") +;; (set_attr "needs_delay_slot" "yes")]) + +(define_insn "jump_media" + [(set (pc) + (match_operand 0 "target_operand" "b"))] + "TARGET_SHMEDIA" + "blink %0, r63%>" + [(set_attr "type" "jump_media")]) + +(define_expand "jump" + [(set (pc) + (label_ref (match_operand 0 "" "")))] + "" +{ + if (TARGET_SH1) + emit_jump_insn (gen_jump_compact (operands[0])); + else if (TARGET_SHMEDIA) + { + if (reload_in_progress || reload_completed) + FAIL; + emit_jump_insn (gen_jump_media (gen_rtx_LABEL_REF (Pmode, operands[0]))); + } + DONE; +}) + +(define_insn "force_mode_for_call" + [(use (reg:PSI FPSCR_REG))] + "TARGET_SHCOMPACT" + "" + [(set_attr "length" "0") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double")))]) + +(define_insn "calli" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SH1" +{ + if (TARGET_SH2A && (dbr_sequence_length () == 0)) + return "jsr/n @%0"; + else + return "jsr @%0%#"; +} + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes") + (set_attr "fp_set" "unknown")]) + +;; This is TBR relative jump instruction for SH2A architecture. +;; Its use is enabled by assigning an attribute "function_vector" +;; and the vector number to a function during its declaration. +(define_insn "calli_tbr_rel" + [(call (mem (match_operand:SI 0 "symbol_ref_operand" "")) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SH2A && sh2a_is_function_vector_call (operands[0])" +{ + unsigned HOST_WIDE_INT vect_num; + vect_num = sh2a_get_function_vector_number (operands[0]); + operands[2] = GEN_INT (vect_num * 4); + + return "jsr/n @@(%O2,tbr)"; +} + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "no") + (set_attr "fp_set" "unknown")]) + +;; This is a pc-rel call, using bsrf, for use with PIC. +(define_insn "calli_pcrel" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (use (reg:SI PIC_REG)) + (use (match_operand 2 "" "")) + (clobber (reg:SI PR_REG))] + "TARGET_SH2" +{ + return "bsrf %0" "\n" + "%O2:%#"; +} + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes") + (set_attr "fp_set" "unknown")]) + +(define_insn_and_split "call_pcrel" + [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "")) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (use (reg:SI PIC_REG)) + (clobber (reg:SI PR_REG)) + (clobber (match_scratch:SI 2 "=r"))] + "TARGET_SH2" + "#" + "reload_completed" + [(const_int 0)] +{ + rtx lab = PATTERN (gen_call_site ()); + + if (SYMBOL_REF_LOCAL_P (operands[0])) + emit_insn (gen_sym_label2reg (operands[2], operands[0], lab)); + else + emit_insn (gen_symPLT_label2reg (operands[2], operands[0], lab)); + emit_call_insn (gen_calli_pcrel (operands[2], operands[1], copy_rtx (lab))); + DONE; +} + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes") + (set_attr "fp_set" "unknown")]) + +(define_insn "call_compact" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) + (match_operand 1 "" "")) + (match_operand 2 "immediate_operand" "n") + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && ! (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%0%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "call_compact_rettramp" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) + (match_operand 1 "" "")) + (match_operand 2 "immediate_operand" "n") + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI R10_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%0%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "call_media" + [(call (mem:DI (match_operand 0 "target_reg_operand" "b")) + (match_operand 1 "" "")) + (clobber (reg:DI PR_MEDIA_REG))] + "TARGET_SHMEDIA" + "blink %0, r18" + [(set_attr "type" "jump_media")]) + +(define_insn "call_valuei" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand 2 "" ""))) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SH1" +{ + if (TARGET_SH2A && (dbr_sequence_length () == 0)) + return "jsr/n @%1"; + else + return "jsr @%1%#"; +} + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes") + (set_attr "fp_set" "unknown")]) + +;; This is TBR relative jump instruction for SH2A architecture. +;; Its use is enabled by assigning an attribute "function_vector" +;; and the vector number to a function during its declaration. +(define_insn "call_valuei_tbr_rel" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "")) + (match_operand 2 "" ""))) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SH2A && sh2a_is_function_vector_call (operands[1])" +{ + unsigned HOST_WIDE_INT vect_num; + vect_num = sh2a_get_function_vector_number (operands[1]); + operands[3] = GEN_INT (vect_num * 4); + + return "jsr/n @@(%O3,tbr)"; +} + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "no") + (set_attr "fp_set" "unknown")]) + +(define_insn "call_valuei_pcrel" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand 2 "" ""))) + (use (reg:PSI FPSCR_REG)) + (use (reg:SI PIC_REG)) + (use (match_operand 3 "" "")) + (clobber (reg:SI PR_REG))] + "TARGET_SH2" +{ + return "bsrf %1" "\n" + "%O3:%#"; +} + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes") + (set_attr "fp_set" "unknown")]) + +(define_insn_and_split "call_value_pcrel" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "")) + (match_operand 2 "" ""))) + (use (reg:PSI FPSCR_REG)) + (use (reg:SI PIC_REG)) + (clobber (reg:SI PR_REG)) + (clobber (match_scratch:SI 3 "=r"))] + "TARGET_SH2" + "#" + "reload_completed" + [(const_int 0)] +{ + rtx lab = PATTERN (gen_call_site ()); + + if (SYMBOL_REF_LOCAL_P (operands[1])) + emit_insn (gen_sym_label2reg (operands[3], operands[1], lab)); + else + emit_insn (gen_symPLT_label2reg (operands[3], operands[1], lab)); + emit_call_insn (gen_call_valuei_pcrel (operands[0], operands[3], + operands[2], copy_rtx (lab))); + DONE; +} + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes") + (set_attr "fp_set" "unknown")]) + +(define_insn "call_value_compact" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand 2 "" ""))) + (match_operand 3 "immediate_operand" "n") + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && ! (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%1%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "call_value_compact_rettramp" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand 2 "" ""))) + (match_operand 3 "immediate_operand" "n") + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI R10_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%1%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "call_value_media" + [(set (match_operand 0 "" "=rf") + (call (mem:DI (match_operand 1 "target_reg_operand" "b")) + (match_operand 2 "" ""))) + (clobber (reg:DI PR_MEDIA_REG))] + "TARGET_SHMEDIA" + "blink %1, r18" + [(set_attr "type" "jump_media")]) + +(define_expand "call" + [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" "")) + (match_operand 1 "" "")) + (match_operand 2 "" "") + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))])] + "" +{ + if (TARGET_SHMEDIA) + { + operands[0] = shmedia_prepare_call_address (operands[0], 0); + emit_call_insn (gen_call_media (operands[0], operands[1])); + DONE; + } + else if (TARGET_SHCOMPACT && operands[2] && INTVAL (operands[2])) + { + rtx cookie_rtx = operands[2]; + long cookie = INTVAL (cookie_rtx); + rtx func = XEXP (operands[0], 0); + rtx r0, r1; + + if (flag_pic) + { + if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func)) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOTPLT2reg (reg, func)); + func = reg; + } + else + func = legitimize_pic_address (func, Pmode, 0); + } + + r0 = gen_rtx_REG (SImode, R0_REG); + r1 = gen_rtx_REG (SImode, R1_REG); + + /* Since such a call function may use all call-clobbered + registers, we force a mode switch earlier, so that we don't + run out of registers when adjusting fpscr for the call. */ + emit_insn (gen_force_mode_for_call ()); + + operands[0] + = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT); + operands[0] = force_reg (SImode, operands[0]); + + emit_move_insn (r0, func); + emit_move_insn (r1, cookie_rtx); + + if (cookie & CALL_COOKIE_RET_TRAMP (1)) + emit_call_insn (gen_call_compact_rettramp (operands[0], operands[1], + operands[2])); + else + emit_call_insn (gen_call_compact (operands[0], operands[1], + operands[2])); + + DONE; + } + else if (TARGET_SHCOMPACT && flag_pic + && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF + && ! SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0))) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[0], 0))); + XEXP (operands[0], 0) = reg; + } + if (!flag_pic && TARGET_SH2A + && MEM_P (operands[0]) + && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF) + { + if (sh2a_is_function_vector_call (XEXP (operands[0], 0))) + { + emit_call_insn (gen_calli_tbr_rel (XEXP (operands[0], 0), + operands[1])); + DONE; + } + } + if (flag_pic && TARGET_SH2 + && MEM_P (operands[0]) + && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF) + { + emit_call_insn (gen_call_pcrel (XEXP (operands[0], 0), operands[1])); + DONE; + } + else + { + operands[0] = force_reg (SImode, XEXP (operands[0], 0)); + operands[1] = operands[2]; + } + + emit_call_insn (gen_calli (operands[0], operands[1])); + DONE; +}) + +(define_insn "call_pop_compact" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) + (match_operand 1 "" "")) + (match_operand 2 "immediate_operand" "n") + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand 3 "immediate_operand" "n"))) + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && ! (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%0%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "call_pop_compact_rettramp" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) + (match_operand 1 "" "")) + (match_operand 2 "immediate_operand" "n") + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand 3 "immediate_operand" "n"))) + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI R10_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%0%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "call_pop" + [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" "")) + (match_operand 1 "" "")) + (match_operand 2 "" "") + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand 3 "" "")))])] + "TARGET_SHCOMPACT" +{ + rtx cookie_rtx; + long cookie; + rtx func; + rtx r0, r1; + + gcc_assert (operands[2] && INTVAL (operands[2])); + cookie_rtx = operands[2]; + cookie = INTVAL (cookie_rtx); + func = XEXP (operands[0], 0); + + if (flag_pic) + { + if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func)) + { + rtx reg = gen_reg_rtx (Pmode); + emit_insn (gen_symGOTPLT2reg (reg, func)); + func = reg; + } + else + func = legitimize_pic_address (func, Pmode, 0); + } + + r0 = gen_rtx_REG (SImode, R0_REG); + r1 = gen_rtx_REG (SImode, R1_REG); + + /* Since such a call function may use all call-clobbered + registers, we force a mode switch earlier, so that we don't + run out of registers when adjusting fpscr for the call. */ + emit_insn (gen_force_mode_for_call ()); + + operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline", + SFUNC_GOT); + operands[0] = force_reg (SImode, operands[0]); + + emit_move_insn (r0, func); + emit_move_insn (r1, cookie_rtx); + + if (cookie & CALL_COOKIE_RET_TRAMP (1)) + emit_call_insn (gen_call_pop_compact_rettramp + (operands[0], operands[1], operands[2], operands[3])); + else + emit_call_insn (gen_call_pop_compact + (operands[0], operands[1], operands[2], operands[3])); + + DONE; +}) + +(define_expand "call_value" + [(parallel [(set (match_operand 0 "arith_reg_operand" "") + (call (mem:SI (match_operand 1 "arith_reg_operand" "")) + (match_operand 2 "" ""))) + (match_operand 3 "" "") + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))])] + "" +{ + if (TARGET_SHMEDIA) + { + operands[1] = shmedia_prepare_call_address (operands[1], 0); + emit_call_insn (gen_call_value_media (operands[0], operands[1], + operands[2])); + DONE; + } + else if (TARGET_SHCOMPACT && operands[3] && INTVAL (operands[3])) + { + rtx cookie_rtx = operands[3]; + long cookie = INTVAL (cookie_rtx); + rtx func = XEXP (operands[1], 0); + rtx r0, r1; + + if (flag_pic) + { + if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func)) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOTPLT2reg (reg, func)); + func = reg; + } + else + func = legitimize_pic_address (func, Pmode, 0); + } + + r0 = gen_rtx_REG (SImode, R0_REG); + r1 = gen_rtx_REG (SImode, R1_REG); + + /* Since such a call function may use all call-clobbered + registers, we force a mode switch earlier, so that we don't + run out of registers when adjusting fpscr for the call. */ + emit_insn (gen_force_mode_for_call ()); + + operands[1] + = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT); + operands[1] = force_reg (SImode, operands[1]); + + emit_move_insn (r0, func); + emit_move_insn (r1, cookie_rtx); + + if (cookie & CALL_COOKIE_RET_TRAMP (1)) + emit_call_insn (gen_call_value_compact_rettramp (operands[0], + operands[1], + operands[2], + operands[3])); + else + emit_call_insn (gen_call_value_compact (operands[0], operands[1], + operands[2], operands[3])); + + DONE; + } + else if (TARGET_SHCOMPACT && flag_pic + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + && ! SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0))) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[1], 0))); + XEXP (operands[1], 0) = reg; + } + if (!flag_pic && TARGET_SH2A + && MEM_P (operands[1]) + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF) + { + if (sh2a_is_function_vector_call (XEXP (operands[1], 0))) + { + emit_call_insn (gen_call_valuei_tbr_rel (operands[0], + XEXP (operands[1], 0), operands[2])); + DONE; + } + } + if (flag_pic && TARGET_SH2 + && MEM_P (operands[1]) + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF) + { + emit_call_insn (gen_call_value_pcrel (operands[0], XEXP (operands[1], 0), + operands[2])); + DONE; + } + else + operands[1] = force_reg (SImode, XEXP (operands[1], 0)); + + emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "sibcalli" + [(call (mem:SI (match_operand:SI 0 "register_operand" "k")) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (return)] + "TARGET_SH1" + "jmp @%0%#" + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump_ind")]) + +(define_insn "sibcalli_pcrel" + [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "k")) + (match_operand 1 "" "")) + (use (match_operand 2 "" "")) + (use (reg:PSI FPSCR_REG)) + (return)] + "TARGET_SH2" +{ + return "braf %0" "\n" + "%O2:%#"; +} + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump_ind")]) + +;; This uses an unspec to describe that the symbol_ref is very close. +(define_insn "sibcalli_thunk" + [(call (mem:SI (unspec:SI [(match_operand:SI 0 "symbol_ref_operand" "")] + UNSPEC_THUNK)) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (return)] + "TARGET_SH1" + "bra %O0" + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump") + (set_attr "length" "2")]) + +(define_insn_and_split "sibcall_pcrel" + [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" "")) + (match_operand 1 "" "")) + (use (reg:PSI FPSCR_REG)) + (clobber (match_scratch:SI 2 "=k")) + (return)] + "TARGET_SH2" + "#" + "reload_completed" + [(const_int 0)] +{ + rtx lab = PATTERN (gen_call_site ()); + rtx call_insn; + + emit_insn (gen_sym_label2reg (operands[2], operands[0], lab)); + call_insn = emit_call_insn (gen_sibcalli_pcrel (operands[2], operands[1], + copy_rtx (lab))); + SIBLING_CALL_P (call_insn) = 1; + DONE; +} + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump_ind")]) + +(define_insn "sibcall_compact" + [(call (mem:SI (match_operand:SI 0 "register_operand" "k,k")) + (match_operand 1 "" "")) + (return) + (use (match_operand:SI 2 "register_operand" "z,x")) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + ;; We want to make sure the `x' above will only match MACH_REG + ;; because sibcall_epilogue may clobber MACL_REG. + (clobber (reg:SI MACL_REG))] + "TARGET_SHCOMPACT" +{ + static const char* alt[] = + { + "jmp @%0%#", + + "jmp @%0" "\n" + " sts %2,r0" + }; + return alt[which_alternative]; +} + [(set_attr "needs_delay_slot" "yes,no") + (set_attr "length" "2,4") + (set (attr "fp_mode") (const_string "single")) + (set_attr "type" "jump_ind")]) + +(define_insn "sibcall_media" + [(call (mem:DI (match_operand 0 "target_reg_operand" "k")) + (match_operand 1 "" "")) + (use (reg:SI PR_MEDIA_REG)) + (return)] + "TARGET_SHMEDIA" + "blink %0, r63" + [(set_attr "type" "jump_media")]) + +(define_expand "sibcall" + [(parallel + [(call (mem:SI (match_operand 0 "arith_reg_operand" "")) + (match_operand 1 "" "")) + (match_operand 2 "" "") + (use (reg:PSI FPSCR_REG)) + (return)])] + "" +{ + if (TARGET_SHMEDIA) + { + operands[0] = shmedia_prepare_call_address (operands[0], 1); + emit_call_insn (gen_sibcall_media (operands[0], operands[1])); + DONE; + } + else if (TARGET_SHCOMPACT && operands[2] + && (INTVAL (operands[2]) & ~ CALL_COOKIE_RET_TRAMP (1))) + { + rtx cookie_rtx = operands[2]; + long cookie = INTVAL (cookie_rtx); + rtx func = XEXP (operands[0], 0); + rtx mach, r1; + + if (flag_pic) + { + if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func)) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOT2reg (reg, func)); + func = reg; + } + else + func = legitimize_pic_address (func, Pmode, 0); + } + + /* FIXME: if we could tell whether all argument registers are + already taken, we could decide whether to force the use of + MACH_REG or to stick to R0_REG. Unfortunately, there's no + simple way to tell. We could use the CALL_COOKIE, but we + can't currently tell a register used for regular argument + passing from one that is unused. If we leave it up to reload + to decide which register to use, it seems to always choose + R0_REG, which leaves no available registers in SIBCALL_REGS + to hold the address of the trampoline. */ + mach = gen_rtx_REG (SImode, MACH_REG); + r1 = gen_rtx_REG (SImode, R1_REG); + + /* Since such a call function may use all call-clobbered + registers, we force a mode switch earlier, so that we don't + run out of registers when adjusting fpscr for the call. */ + emit_insn (gen_force_mode_for_call ()); + + operands[0] + = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT); + operands[0] = force_reg (SImode, operands[0]); + + /* We don't need a return trampoline, since the callee will + return directly to the upper caller. */ + if (cookie & CALL_COOKIE_RET_TRAMP (1)) + { + cookie &= ~ CALL_COOKIE_RET_TRAMP (1); + cookie_rtx = GEN_INT (cookie); + } + + emit_move_insn (mach, func); + emit_move_insn (r1, cookie_rtx); + + emit_call_insn (gen_sibcall_compact (operands[0], operands[1], mach)); + DONE; + } + else if (TARGET_SHCOMPACT && flag_pic + && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF + && ! SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0))) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOT2reg (reg, XEXP (operands[0], 0))); + XEXP (operands[0], 0) = reg; + } + if (flag_pic && TARGET_SH2 + && MEM_P (operands[0]) + && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF + /* The PLT needs the PIC register, but the epilogue would have + to restore it, so we can only use PC-relative PIC calls for + static functions. */ + && SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0))) + { + emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0), operands[1])); + DONE; + } + else + operands[0] = force_reg (SImode, XEXP (operands[0], 0)); + + emit_call_insn (gen_sibcalli (operands[0], operands[1])); + DONE; +}) + +(define_insn "sibcall_valuei" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "register_operand" "k")) + (match_operand 2 "" ""))) + (use (reg:PSI FPSCR_REG)) + (return)] + "TARGET_SH1" + "jmp @%1%#" + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump_ind")]) + +(define_insn "sibcall_valuei_pcrel" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "k")) + (match_operand 2 "" ""))) + (use (match_operand 3 "" "")) + (use (reg:PSI FPSCR_REG)) + (return)] + "TARGET_SH2" +{ + return "braf %1" "\n" + "%O3:%#"; +} + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump_ind")]) + +(define_insn_and_split "sibcall_value_pcrel" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "")) + (match_operand 2 "" ""))) + (use (reg:PSI FPSCR_REG)) + (clobber (match_scratch:SI 3 "=k")) + (return)] + "TARGET_SH2" + "#" + "reload_completed" + [(const_int 0)] +{ + rtx lab = PATTERN (gen_call_site ()); + rtx call_insn; + + emit_insn (gen_sym_label2reg (operands[3], operands[1], lab)); + call_insn = emit_call_insn (gen_sibcall_valuei_pcrel (operands[0], + operands[3], + operands[2], + copy_rtx (lab))); + SIBLING_CALL_P (call_insn) = 1; + DONE; +} + [(set_attr "needs_delay_slot" "yes") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "type" "jump_ind")]) + +(define_insn "sibcall_value_compact" + [(set (match_operand 0 "" "=rf,rf") + (call (mem:SI (match_operand:SI 1 "register_operand" "k,k")) + (match_operand 2 "" ""))) + (return) + (use (match_operand:SI 3 "register_operand" "z,x")) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + ;; We want to make sure the `x' above will only match MACH_REG + ;; because sibcall_epilogue may clobber MACL_REG. + (clobber (reg:SI MACL_REG))] + "TARGET_SHCOMPACT" +{ + static const char* alt[] = + { + "jmp @%1%#", + + "jmp @%1" "\n" + " sts %3,r0" + }; + return alt[which_alternative]; +} + [(set_attr "needs_delay_slot" "yes,no") + (set_attr "length" "2,4") + (set (attr "fp_mode") (const_string "single")) + (set_attr "type" "jump_ind")]) + +(define_insn "sibcall_value_media" + [(set (match_operand 0 "" "=rf") + (call (mem:DI (match_operand 1 "target_reg_operand" "k")) + (match_operand 2 "" ""))) + (use (reg:SI PR_MEDIA_REG)) + (return)] + "TARGET_SHMEDIA" + "blink %1, r63" + [(set_attr "type" "jump_media")]) + +(define_expand "sibcall_value" + [(parallel + [(set (match_operand 0 "arith_reg_operand" "") + (call (mem:SI (match_operand 1 "arith_reg_operand" "")) + (match_operand 2 "" ""))) + (match_operand 3 "" "") + (use (reg:PSI FPSCR_REG)) + (return)])] + "" +{ + if (TARGET_SHMEDIA) + { + operands[1] = shmedia_prepare_call_address (operands[1], 1); + emit_call_insn (gen_sibcall_value_media (operands[0], operands[1], + operands[2])); + DONE; + } + else if (TARGET_SHCOMPACT && operands[3] + && (INTVAL (operands[3]) & ~ CALL_COOKIE_RET_TRAMP (1))) + { + rtx cookie_rtx = operands[3]; + long cookie = INTVAL (cookie_rtx); + rtx func = XEXP (operands[1], 0); + rtx mach, r1; + + if (flag_pic) + { + if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func)) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOT2reg (reg, func)); + func = reg; + } + else + func = legitimize_pic_address (func, Pmode, 0); + } + + /* FIXME: if we could tell whether all argument registers are + already taken, we could decide whether to force the use of + MACH_REG or to stick to R0_REG. Unfortunately, there's no + simple way to tell. We could use the CALL_COOKIE, but we + can't currently tell a register used for regular argument + passing from one that is unused. If we leave it up to reload + to decide which register to use, it seems to always choose + R0_REG, which leaves no available registers in SIBCALL_REGS + to hold the address of the trampoline. */ + mach = gen_rtx_REG (SImode, MACH_REG); + r1 = gen_rtx_REG (SImode, R1_REG); + + /* Since such a call function may use all call-clobbered + registers, we force a mode switch earlier, so that we don't + run out of registers when adjusting fpscr for the call. */ + emit_insn (gen_force_mode_for_call ()); + + operands[1] + = function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT); + operands[1] = force_reg (SImode, operands[1]); + + /* We don't need a return trampoline, since the callee will + return directly to the upper caller. */ + if (cookie & CALL_COOKIE_RET_TRAMP (1)) + { + cookie &= ~ CALL_COOKIE_RET_TRAMP (1); + cookie_rtx = GEN_INT (cookie); + } + + emit_move_insn (mach, func); + emit_move_insn (r1, cookie_rtx); + + emit_call_insn (gen_sibcall_value_compact (operands[0], operands[1], + operands[2], mach)); + DONE; + } + else if (TARGET_SHCOMPACT && flag_pic + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + && ! SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0))) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOT2reg (reg, XEXP (operands[1], 0))); + XEXP (operands[1], 0) = reg; + } + if (flag_pic && TARGET_SH2 + && MEM_P (operands[1]) + && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + /* The PLT needs the PIC register, but the epilogue would have + to restore it, so we can only use PC-relative PIC calls for + static functions. */ + && SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0))) + { + emit_call_insn (gen_sibcall_value_pcrel (operands[0], + XEXP (operands[1], 0), + operands[2])); + DONE; + } + else + operands[1] = force_reg (SImode, XEXP (operands[1], 0)); + + emit_call_insn (gen_sibcall_valuei (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "call_value_pop_compact" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand 2 "" ""))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand 4 "immediate_operand" "n"))) + (match_operand 3 "immediate_operand" "n") + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && ! (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%1%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "call_value_pop_compact_rettramp" + [(set (match_operand 0 "" "=rf") + (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand 2 "" ""))) + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand 4 "immediate_operand" "n"))) + (match_operand 3 "immediate_operand" "n") + (use (reg:SI R0_REG)) + (use (reg:SI R1_REG)) + (use (reg:PSI FPSCR_REG)) + (clobber (reg:SI R10_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT && (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))" + "jsr @%1%#" + [(set_attr "type" "call") + (set (attr "fp_mode") + (if_then_else (eq_attr "fpu_single" "yes") + (const_string "single") (const_string "double"))) + (set_attr "needs_delay_slot" "yes")]) + +(define_expand "call_value_pop" + [(parallel [(set (match_operand 0 "arith_reg_operand" "") + (call (mem:SI (match_operand 1 "arith_reg_operand" "")) + (match_operand 2 "" ""))) + (match_operand 3 "" "") + (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) + (match_operand 4 "" "")))])] + "TARGET_SHCOMPACT" +{ + rtx cookie_rtx; + long cookie; + rtx func; + rtx r0, r1; + + gcc_assert (TARGET_SHCOMPACT && operands[3] && INTVAL (operands[3])); + cookie_rtx = operands[3]; + cookie = INTVAL (cookie_rtx); + func = XEXP (operands[1], 0); + + if (flag_pic) + { + if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func)) + { + rtx reg = gen_reg_rtx (Pmode); + + emit_insn (gen_symGOTPLT2reg (reg, func)); + func = reg; + } + else + func = legitimize_pic_address (func, Pmode, 0); + } + + r0 = gen_rtx_REG (SImode, R0_REG); + r1 = gen_rtx_REG (SImode, R1_REG); + + /* Since such a call function may use all call-clobbered + registers, we force a mode switch earlier, so that we don't + run out of registers when adjusting fpscr for the call. */ + emit_insn (gen_force_mode_for_call ()); + + operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline", + SFUNC_GOT); + operands[1] = force_reg (SImode, operands[1]); + + emit_move_insn (r0, func); + emit_move_insn (r1, cookie_rtx); + + if (cookie & CALL_COOKIE_RET_TRAMP (1)) + emit_call_insn (gen_call_value_pop_compact_rettramp + (operands[0], operands[1], operands[2], + operands[3], operands[4])); + else + emit_call_insn (gen_call_value_pop_compact + (operands[0], operands[1], operands[2], + operands[3], operands[4])); + + DONE; +}) + +(define_expand "sibcall_epilogue" + [(return)] + "" +{ + sh_expand_epilogue (true); + if (TARGET_SHCOMPACT) + { + rtx insn, set; + + /* If epilogue clobbers r0, preserve it in macl. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if ((set = single_set (insn)) + && REG_P (SET_DEST (set)) + && REGNO (SET_DEST (set)) == R0_REG) + { + rtx r0 = gen_rtx_REG (SImode, R0_REG); + rtx tmp = gen_rtx_REG (SImode, MACL_REG); + + /* We can't tell at this point whether the sibcall is a + sibcall_compact and, if it is, whether it uses r0 or + mach as operand 2, so let the instructions that + preserve r0 be optimized away if r0 turns out to be + dead. */ + emit_insn_before (gen_rtx_SET (SImode, tmp, r0), insn); + emit_move_insn (r0, tmp); + break; + } + } + DONE; +}) + +(define_insn "indirect_jump_compact" + [(set (pc) + (match_operand:SI 0 "arith_reg_operand" "r"))] + "TARGET_SH1" + "jmp @%0%#" + [(set_attr "needs_delay_slot" "yes") + (set_attr "type" "jump_ind")]) + +(define_expand "indirect_jump" + [(set (pc) + (match_operand 0 "register_operand" ""))] + "" +{ + if (GET_MODE (operands[0]) != Pmode) + operands[0] = gen_rtx_SUBREG (Pmode, operands[0], 0); +}) + +;; The use of operand 1 / 2 helps us distinguish case table jumps +;; which can be present in structured code from indirect jumps which can not +;; be present in structured code. This allows -fprofile-arcs to work. + +;; For SH1 processors. +(define_insn "casesi_jump_1" + [(set (pc) + (match_operand:SI 0 "register_operand" "r")) + (use (label_ref (match_operand 1 "" "")))] + "TARGET_SH1" + "jmp @%0%#" + [(set_attr "needs_delay_slot" "yes") + (set_attr "type" "jump_ind")]) + +;; For all later processors. +(define_insn "casesi_jump_2" + [(set (pc) (plus:SI (match_operand:SI 0 "register_operand" "r") + (label_ref (match_operand 1 "" "")))) + (use (label_ref (match_operand 2 "" "")))] + "TARGET_SH2 + && (! INSN_UID (operands[1]) || prev_real_insn (operands[1]) == insn)" + "braf %0%#" + [(set_attr "needs_delay_slot" "yes") + (set_attr "type" "jump_ind")]) + +(define_insn "casesi_jump_media" + [(set (pc) (match_operand 0 "target_reg_operand" "b")) + (use (label_ref (match_operand 1 "" "")))] + "TARGET_SHMEDIA" + "blink %0, r63" + [(set_attr "type" "jump_media")]) + +;; Call subroutine returning any type. +;; ??? This probably doesn't work. +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] + "(TARGET_SH2E || TARGET_SH2A) || TARGET_SHMEDIA" +{ + emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx)); + + for (int i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + + DONE; +}) + +;; ------------------------------------------------------------------------ +;; Misc insns +;; ------------------------------------------------------------------------ + +(define_insn "dect" + [(set (reg:SI T_REG) + (eq:SI (match_operand:SI 1 "arith_reg_dest" "0") (const_int 1))) + (set (match_operand:SI 0 "arith_reg_dest" "=r") + (plus:SI (match_dup 1) (const_int -1)))] + "TARGET_SH2" + "dt %0" + [(set_attr "type" "arith")]) + +(define_insn "nop" + [(const_int 0)] + "" + "nop") + +;; Load address of a label. This is only generated by the casesi expand, +;; and by machine_dependent_reorg (fixing up fp moves). +;; This must use unspec, because this only works for labels that are +;; within range. +(define_insn "mova" + [(set (reg:SI R0_REG) + (unspec:SI [(label_ref (match_operand 0 "" ""))] UNSPEC_MOVA))] + "TARGET_SH1" + "mova %O0,r0" + [(set_attr "in_delay_slot" "no") + (set_attr "type" "arith")]) + +;; machine_dependent_reorg will make this a `mova'. +(define_insn "mova_const" + [(set (reg:SI R0_REG) + (unspec:SI [(match_operand 0 "immediate_operand" "i")] UNSPEC_MOVA))] + "TARGET_SH1" + "#" + [(set_attr "in_delay_slot" "no") + (set_attr "type" "arith")]) + +(define_expand "GOTaddr2picreg" + [(set (reg:SI R0_REG) + (unspec:SI [(const:SI (unspec:SI [(match_dup 1)] UNSPEC_PIC))] + UNSPEC_MOVA)) + (set (match_dup 0) (const:SI (unspec:SI [(match_dup 1)] UNSPEC_PIC))) + (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI R0_REG)))] + "" +{ + if (TARGET_VXWORKS_RTP) + { + rtx gott_base = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE); + rtx gott_index = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); + emit_insn (gen_vxworks_picreg (gott_base, gott_index)); + DONE; + } + + operands[0] = gen_rtx_REG (Pmode, PIC_REG); + operands[1] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME); + + if (TARGET_SHMEDIA) + { + rtx tr = gen_rtx_REG (Pmode, TR0_REG); + rtx pic = operands[0]; + rtx lab = PATTERN (gen_call_site ()); + rtx insn, equiv; + + equiv = operands[1]; + operands[1] = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[1], lab), + UNSPEC_PCREL_SYMOFF); + operands[1] = gen_rtx_CONST (Pmode, operands[1]); + + if (Pmode == SImode) + { + emit_insn (gen_movsi_const (pic, operands[1])); + emit_insn (gen_ptrel_si (tr, pic, copy_rtx (lab))); + } + else + { + emit_insn (gen_movdi_const (pic, operands[1])); + emit_insn (gen_ptrel_di (tr, pic, copy_rtx (lab))); + } + + insn = emit_move_insn (operands[0], tr); + + set_unique_reg_note (insn, REG_EQUAL, equiv); + + DONE; + } +}) + +;; A helper for GOTaddr2picreg to finish up the initialization of the +;; PIC register. +(define_expand "vxworks_picreg" + [(set (reg:SI PIC_REG) + (const:SI (unspec:SI [(match_operand:SI 0 "" "")] UNSPEC_PIC))) + (set (reg:SI R0_REG) + (const:SI (unspec:SI [(match_operand:SI 1 "" "")] UNSPEC_PIC))) + (set (reg:SI PIC_REG) + (mem:SI (reg:SI PIC_REG))) + (set (reg:SI PIC_REG) + (mem:SI (plus:SI (reg:SI PIC_REG) + (reg:SI R0_REG))))] + "TARGET_VXWORKS_RTP") + +(define_insn "*ptb" + [(set (match_operand 0 "target_reg_operand" "=b") + (const (unspec [(match_operand 1 "" "Csy")] + UNSPEC_DATALABEL)))] + "TARGET_SHMEDIA && flag_pic + && satisfies_constraint_Csy (operands[1])" + "ptb/u datalabel %1, %0" + [(set_attr "type" "ptabs_media") + (set_attr "length" "*")]) + +(define_insn "ptrel_si" + [(set (match_operand:SI 0 "target_reg_operand" "=b") + (plus:SI (match_operand:SI 1 "register_operand" "r") + (pc))) + (match_operand:SI 2 "" "")] + "TARGET_SHMEDIA" + "%O2: ptrel/u %1, %0" + [(set_attr "type" "ptabs_media")]) + +(define_insn "ptrel_di" + [(set (match_operand:DI 0 "target_reg_operand" "=b") + (plus:DI (match_operand:DI 1 "register_operand" "r") + (pc))) + (match_operand:DI 2 "" "")] + "TARGET_SHMEDIA" + "%O2: ptrel/u %1, %0" + [(set_attr "type" "ptabs_media")]) + +(define_expand "builtin_setjmp_receiver" + [(match_operand 0 "" "")] + "flag_pic" +{ + emit_insn (gen_GOTaddr2picreg ()); + DONE; +}) + +(define_expand "call_site" + [(unspec [(match_dup 0)] UNSPEC_CALLER)] + "TARGET_SH1" +{ + static HOST_WIDE_INT i = 0; + operands[0] = GEN_INT (i); + i++; +}) + +;; op0 = op1 + r12 but hide it before reload completed. See the comment +;; in symGOT_load expand. +(define_insn_and_split "chk_guard_add" + [(set (match_operand:SI 0 "register_operand" "=&r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (reg:SI PIC_REG)] + UNSPEC_CHKADD))] + "TARGET_SH1" + "#" + "TARGET_SH1 && reload_completed" + [(set (match_dup 0) (reg:SI PIC_REG)) + (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))] + "" + [(set_attr "type" "arith")]) + +(define_expand "sym_label2reg" + [(set (match_operand:SI 0 "" "") + (const:SI (unspec:SI [(match_operand:SI 1 "" "") + (const (plus:SI (match_operand:SI 2 "" "") + (const_int 2)))] + UNSPEC_SYMOFF)))] + "TARGET_SH1" "") + +(define_expand "symGOT_load" + [(set (match_dup 2) (match_operand 1 "" "")) + (set (match_dup 3) (plus (match_dup 2) (reg PIC_REG))) + (set (match_operand 0 "" "") (mem (match_dup 3)))] + "" +{ + rtx mem; + + operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); + operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); + + if (TARGET_SHMEDIA) + { + rtx reg = operands[2]; + + if (Pmode == DImode) + { + if (flag_pic > 1) + emit_insn (gen_movdi_const_32bit (reg, operands[1])); + else + emit_insn (gen_movdi_const_16bit (reg, operands[1])); + } + else + { + if (flag_pic > 1) + emit_insn (gen_movsi_const (reg, operands[1])); + else + emit_insn (gen_movsi_const_16bit (reg, operands[1])); + } + } + else + emit_move_insn (operands[2], operands[1]); + + /* When stack protector inserts codes after the result is set to + R0, @(rX, r12) will cause a spill failure for R0. Use a unspec + insn to avoid combining (set A (plus rX r12)) and (set op0 (mem A)) + when rX is a GOT address for the guard symbol. Ugly but doesn't + matter because this is a rare situation. */ + if (!TARGET_SHMEDIA + && flag_stack_protect + && GET_CODE (operands[1]) == CONST + && GET_CODE (XEXP (operands[1], 0)) == UNSPEC + && GET_CODE (XVECEXP (XEXP (operands[1], 0), 0, 0)) == SYMBOL_REF + && strcmp (XSTR (XVECEXP (XEXP (operands[1], 0), 0, 0), 0), + "__stack_chk_guard") == 0) + emit_insn (gen_chk_guard_add (operands[3], operands[2])); + else + emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2], + gen_rtx_REG (Pmode, PIC_REG))); + + /* N.B. This is not constant for a GOTPLT relocation. */ + mem = gen_rtx_MEM (Pmode, operands[3]); + MEM_NOTRAP_P (mem) = 1; + /* ??? Should we have a special alias set for the GOT? */ + emit_move_insn (operands[0], mem); + + DONE; +}) + +(define_expand "sym2GOT" + [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOT))] + "" + "") + +(define_expand "symGOT2reg" + [(match_operand 0 "" "") (match_operand 1 "" "")] + "" +{ + rtx gotsym, insn; + + gotsym = gen_sym2GOT (operands[1]); + PUT_MODE (gotsym, Pmode); + insn = emit_insn (gen_symGOT_load (operands[0], gotsym)); + + MEM_READONLY_P (SET_SRC (PATTERN (insn))) = 1; + + DONE; +}) + +(define_expand "symGOTPLT2reg" + [(match_operand 0 "" "") (match_operand 1 "" "")] + "" +{ + rtx pltsym = gen_rtx_CONST (Pmode, + gen_rtx_UNSPEC (Pmode, + gen_rtvec (1, operands[1]), + UNSPEC_GOTPLT)); + emit_insn (gen_symGOT_load (operands[0], pltsym)); + DONE; +}) + +(define_expand "sym2GOTOFF" + [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOTOFF))] + "" + "") + +(define_expand "symGOTOFF2reg" + [(match_operand 0 "" "") (match_operand 1 "" "")] + "" +{ + rtx gotoffsym, insn; + rtx t = (!can_create_pseudo_p () + ? operands[0] + : gen_reg_rtx (GET_MODE (operands[0]))); + + gotoffsym = gen_sym2GOTOFF (operands[1]); + PUT_MODE (gotoffsym, Pmode); + emit_move_insn (t, gotoffsym); + insn = emit_move_insn (operands[0], + gen_rtx_PLUS (Pmode, t, + gen_rtx_REG (Pmode, PIC_REG))); + + set_unique_reg_note (insn, REG_EQUAL, operands[1]); + + DONE; +}) + +(define_expand "symPLT_label2reg" + [(set (match_operand:SI 0 "" "") + (const:SI + (unspec:SI + [(const:SI (unspec:SI [(match_operand:SI 1 "" "")] UNSPEC_PLT)) + (const:SI (plus:SI (match_operand:SI 2 "" "") + (const_int 2)))] UNSPEC_PCREL_SYMOFF))) + ;; Even though the PIC register is not really used by the call + ;; sequence in which this is expanded, the PLT code assumes the PIC + ;; register is set, so we must not skip its initialization. Since + ;; we only use this expand as part of calling sequences, and never + ;; to take the address of a function, this is the best point to + ;; insert the (use). Using the PLT to take the address of a + ;; function would be wrong, not only because the PLT entry could + ;; then be called from a function that doesn't initialize the PIC + ;; register to the proper GOT, but also because pointers to the + ;; same function might not compare equal, should they be set by + ;; different shared libraries. + (use (reg:SI PIC_REG))] + "TARGET_SH1" + "") + +(define_expand "sym2PIC" + [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_PIC))] + "" + "") + +;; ------------------------------------------------------------------------- +;; TLS code generation. + +;; FIXME: The multi-insn asm blocks should be converted to use +;; define_insn_and_split. +;; See the thread [PATCH/RFA] SH TLS support on gcc-patches +;; <http://gcc.gnu.org/ml/gcc-patches/2003-02/msg01898.html> +;; for details. + +(define_insn "tls_global_dynamic" + [(set (match_operand:SI 0 "register_operand" "=&z") + (call:SI (mem:SI (unspec:SI [(match_operand:SI 1 "" "")] + UNSPEC_TLSGD)) + (const_int 0))) + (use (reg:PSI FPSCR_REG)) + (use (reg:SI PIC_REG)) + (clobber (reg:SI PR_REG)) + (clobber (scratch:SI))] + "TARGET_SH1" +{ + return "mov.l 1f,r4" "\n" + " mova 2f,r0" "\n" + " mov.l 2f,r1" "\n" + " add r0,r1" "\n" + " jsr @r1" "\n" + " add r12,r4" "\n" + " bra 3f" "\n" + " nop" "\n" + " .align 2" "\n" + "1: .long %a1@TLSGD" "\n" + "2: .long __tls_get_addr@PLT" "\n" + "3:"; +} + [(set_attr "type" "tls_load") + (set_attr "length" "26")]) + +(define_insn "tls_local_dynamic" + [(set (match_operand:SI 0 "register_operand" "=&z") + (call:SI (mem:SI (unspec:SI [(match_operand:SI 1 "" "")] + UNSPEC_TLSLDM)) + (const_int 0))) + (use (reg:PSI FPSCR_REG)) + (use (reg:SI PIC_REG)) + (clobber (reg:SI PR_REG)) + (clobber (scratch:SI))] + "TARGET_SH1" +{ + return "mov.l 1f,r4" "\n" + " mova 2f,r0" "\n" + " mov.l 2f,r1" "\n" + " add r0,r1" "\n" + " jsr @r1" "\n" + " add r12,r4" "\n" + " bra 3f" "\n" + " nop" "\n" + " .align 2" "\n" + "1: .long %a1@TLSLDM" "\n" + "2: .long __tls_get_addr@PLT" "\n" + "3:"; +} + [(set_attr "type" "tls_load") + (set_attr "length" "26")]) + +(define_expand "sym2DTPOFF" + [(const (unspec [(match_operand 0 "" "")] UNSPEC_DTPOFF))] + "" + "") + +(define_expand "symDTPOFF2reg" + [(match_operand 0 "" "") (match_operand 1 "" "") (match_operand 2 "" "")] + "" +{ + rtx dtpoffsym; + rtx t = (!can_create_pseudo_p () + ? operands[0] + : gen_reg_rtx (GET_MODE (operands[0]))); + + dtpoffsym = gen_sym2DTPOFF (operands[1]); + PUT_MODE (dtpoffsym, Pmode); + emit_move_insn (t, dtpoffsym); + emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, operands[2])); + DONE; +}) + +(define_expand "sym2GOTTPOFF" + [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOTTPOFF))] + "" + "") + +(define_insn "tls_initial_exec" + [(set (match_operand:SI 0 "register_operand" "=&r") + (unspec:SI [(match_operand:SI 1 "" "")] + UNSPEC_TLSIE)) + (use (reg:SI GBR_REG)) + (use (reg:SI PIC_REG)) + (clobber (reg:SI R0_REG))] + "" +{ + return "mov.l 1f,r0" "\n" + " stc gbr,%0" "\n" + " mov.l @(r0,r12),r0" "\n" + " bra 2f" "\n" + " add r0,%0" "\n" + " .align 2" "\n" + "1: .long %a1" "\n" + "2:"; +} + [(set_attr "type" "tls_load") + (set_attr "length" "16")]) + +(define_expand "sym2TPOFF" + [(const (unspec [(match_operand 0 "" "")] UNSPEC_TPOFF))] + "" + "") + +(define_expand "symTPOFF2reg" + [(match_operand 0 "" "") (match_operand 1 "" "")] + "" +{ + rtx tpoffsym; + + tpoffsym = gen_sym2TPOFF (operands[1]); + PUT_MODE (tpoffsym, Pmode); + emit_move_insn (operands[0], tpoffsym); + DONE; +}) + +;;------------------------------------------------------------------------------ +;; Thread pointer getter and setter. +;; +;; On SH the thread pointer is kept in the GBR. +;; These patterns are usually expanded from the respective built-in functions. +(define_expand "get_thread_pointersi" + [(set (match_operand:SI 0 "register_operand") (reg:SI GBR_REG))] + "TARGET_SH1") + +;; The store_gbr insn can also be used on !TARGET_SH1 for doing TLS accesses. +(define_insn "store_gbr" + [(set (match_operand:SI 0 "register_operand" "=r") (reg:SI GBR_REG))] + "" + "stc gbr,%0" + [(set_attr "type" "tls_load")]) + +(define_expand "set_thread_pointersi" + [(set (reg:SI GBR_REG) + (unspec_volatile:SI [(match_operand:SI 0 "register_operand")] + UNSPECV_GBR))] + "TARGET_SH1") + +(define_insn "load_gbr" + [(set (reg:SI GBR_REG) + (unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")] + UNSPECV_GBR))] + "TARGET_SH1" + "ldc %0,gbr" + [(set_attr "type" "move")]) + +;;------------------------------------------------------------------------------ +;; Thread pointer relative memory loads and stores. +;; +;; On SH there are GBR displacement address modes which can be utilized to +;; access memory behind the thread pointer. +;; Since we do not allow using GBR for general purpose memory accesses, these +;; GBR addressing modes are formed by the combine pass. +;; This could be done with fewer patterns than below by using a mem predicate +;; for the GBR mem, but then reload would try to reload addresses with a +;; zero displacement for some strange reason. + +(define_insn "*mov<mode>_gbr_load" + [(set (match_operand:QIHISI 0 "register_operand" "=z") + (mem:QIHISI (plus:SI (reg:SI GBR_REG) + (match_operand:QIHISI 1 "gbr_displacement"))))] + "TARGET_SH1" + "mov.<bwl> @(%O1,gbr),%0" + [(set_attr "type" "load")]) + +(define_insn "*mov<mode>_gbr_load" + [(set (match_operand:QIHISI 0 "register_operand" "=z") + (mem:QIHISI (reg:SI GBR_REG)))] + "TARGET_SH1" + "mov.<bwl> @(0,gbr),%0" + [(set_attr "type" "load")]) + +(define_insn "*mov<mode>_gbr_load" + [(set (match_operand:SI 0 "register_operand" "=z") + (sign_extend:SI + (mem:QIHI (plus:SI (reg:SI GBR_REG) + (match_operand:QIHI 1 "gbr_displacement")))))] + "TARGET_SH1" + "mov.<bw> @(%O1,gbr),%0" + [(set_attr "type" "load")]) + +(define_insn "*mov<mode>_gbr_load" + [(set (match_operand:SI 0 "register_operand" "=z") + (sign_extend:SI (mem:QIHI (reg:SI GBR_REG))))] + "TARGET_SH1" + "mov.<bw> @(0,gbr),%0" + [(set_attr "type" "load")]) + +(define_insn "*mov<mode>_gbr_store" + [(set (mem:QIHISI (plus:SI (reg:SI GBR_REG) + (match_operand:QIHISI 0 "gbr_displacement"))) + (match_operand:QIHISI 1 "register_operand" "z"))] + "TARGET_SH1" + "mov.<bwl> %1,@(%O0,gbr)" + [(set_attr "type" "store")]) + +(define_insn "*mov<mode>_gbr_store" + [(set (mem:QIHISI (reg:SI GBR_REG)) + (match_operand:QIHISI 0 "register_operand" "z"))] + "TARGET_SH1" + "mov.<bwl> %0,@(0,gbr)" + [(set_attr "type" "store")]) + +;; DImode memory accesses have to be split in two SImode accesses. +;; Split them before reload, so that it gets a better chance to figure out +;; how to deal with the R0 restriction for the individual SImode accesses. +;; Do not match this insn during or after reload because it can't be split +;; afterwards. +(define_insn_and_split "*movdi_gbr_load" + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "gbr_address_mem"))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 3) (match_dup 5)) + (set (match_dup 4) (match_dup 6))] +{ + /* Swap low/high part load order on little endian, so that the result reg + of the second load can be used better. */ + int off = TARGET_LITTLE_ENDIAN ? 1 : 0; + operands[3 + off] = gen_lowpart (SImode, operands[0]); + operands[5 + off] = gen_lowpart (SImode, operands[1]); + operands[4 - off] = gen_highpart (SImode, operands[0]); + operands[6 - off] = gen_highpart (SImode, operands[1]); +}) + +(define_insn_and_split "*movdi_gbr_store" + [(set (match_operand:DI 0 "gbr_address_mem") + (match_operand:DI 1 "register_operand"))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 3) (match_dup 5)) + (set (match_dup 4) (match_dup 6))] +{ + /* Swap low/high part store order on big endian, so that stores of function + call results can save a reg copy. */ + int off = TARGET_LITTLE_ENDIAN ? 0 : 1; + operands[3 + off] = gen_lowpart (SImode, operands[0]); + operands[5 + off] = gen_lowpart (SImode, operands[1]); + operands[4 - off] = gen_highpart (SImode, operands[0]); + operands[6 - off] = gen_highpart (SImode, operands[1]); +}) + +;; Sometimes memory accesses do not get combined with the store_gbr insn, +;; in particular when the displacements are in the range of the regular move +;; insns. Thus, in the first split pass after the combine pass we search +;; for missed opportunities and try to fix them up ourselves. +;; If an equivalent GBR address can be determined the load / store is split +;; into one of the GBR load / store patterns. +;; All of that must happen before reload (GBR address modes use R0 as the +;; other operand) and there's no point of doing it if the GBR is not +;; referenced in a function at all. +(define_split + [(set (match_operand:QIHISIDI 0 "register_operand") + (match_operand:QIHISIDI 1 "memory_operand"))] + "TARGET_SH1 && !reload_in_progress && !reload_completed + && df_regs_ever_live_p (GBR_REG)" + [(set (match_dup 0) (match_dup 1))] +{ + rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[1]); + if (gbr_mem != NULL_RTX) + operands[1] = replace_equiv_address (operands[1], gbr_mem); + else + FAIL; +}) + +(define_split + [(set (match_operand:SI 0 "register_operand") + (sign_extend:SI (match_operand:QIHI 1 "memory_operand")))] + "TARGET_SH1 && !reload_in_progress && !reload_completed + && df_regs_ever_live_p (GBR_REG)" + [(set (match_dup 0) (sign_extend:SI (match_dup 1)))] +{ + rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[1]); + if (gbr_mem != NULL_RTX) + operands[1] = replace_equiv_address (operands[1], gbr_mem); + else + FAIL; +}) + +;; On SH2A we've got movu.b and movu.w for doing zero-extending mem loads. +;; Split those so that a GBR load can be used. +(define_split + [(set (match_operand:SI 0 "register_operand") + (zero_extend:SI (match_operand:QIHI 1 "memory_operand")))] + "TARGET_SH2A && !reload_in_progress && !reload_completed + && df_regs_ever_live_p (GBR_REG)" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (zero_extend:SI (match_dup 2)))] +{ + rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[1]); + if (gbr_mem != NULL_RTX) + { + operands[2] = gen_reg_rtx (GET_MODE (operands[1])); + operands[1] = replace_equiv_address (operands[1], gbr_mem); + } + else + FAIL; +}) + +(define_split + [(set (match_operand:QIHISIDI 0 "memory_operand") + (match_operand:QIHISIDI 1 "register_operand"))] + "TARGET_SH1 && !reload_in_progress && !reload_completed + && df_regs_ever_live_p (GBR_REG)" + [(set (match_dup 0) (match_dup 1))] +{ + rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[0]); + if (gbr_mem != NULL_RTX) + operands[0] = replace_equiv_address (operands[0], gbr_mem); + else + FAIL; +}) + +;;------------------------------------------------------------------------------ +;; case instruction for switch statements. + +;; operand 0 is index +;; operand 1 is the minimum bound +;; operand 2 is the maximum bound - minimum bound + 1 +;; operand 3 is CODE_LABEL for the table; +;; operand 4 is the CODE_LABEL to go to if index out of range. +(define_expand "casesi" + [(match_operand:SI 0 "arith_reg_operand" "") + (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" "") + (match_operand 3 "" "") (match_operand 4 "" "")] + "" +{ + rtx reg = gen_reg_rtx (SImode); + rtx reg2 = gen_reg_rtx (SImode); + if (TARGET_SHMEDIA) + { + rtx reg = gen_reg_rtx (DImode); + rtx reg2 = gen_reg_rtx (DImode); + rtx reg3 = gen_reg_rtx (Pmode); + rtx reg4 = gen_reg_rtx (Pmode); + rtx reg5 = gen_reg_rtx (Pmode); + rtx load, test; + + operands[0] = convert_modes (DImode, SImode, operands[0], 0); + operands[1] = convert_modes (DImode, SImode, operands[1], 0); + operands[2] = convert_modes (DImode, SImode, operands[2], 1); + + test = gen_rtx_GT (VOIDmode, operands[1], operands[0]); + emit_jump_insn (gen_cbranchdi4 (test, operands[1], operands[0], + operands[4])); + emit_move_insn (reg, gen_rtx_MINUS (DImode, operands[0], operands[1])); + test = gen_rtx_GTU (VOIDmode, reg, operands[2]); + emit_jump_insn (gen_cbranchdi4 (test, reg, operands[2], operands[4])); + emit_insn (gen_casesi_shift_media (reg2, reg, operands[3])); + emit_move_insn (reg3, gen_datalabel_ref (gen_rtx_LABEL_REF + (Pmode, operands[3]))); + /* Messy: can we subreg to clean this up? */ + if (Pmode == DImode) + load = gen_casesi_load_media (reg4, reg3, reg2, operands[3]); + else + load = gen_casesi_load_media (reg4, + gen_rtx_SUBREG (DImode, reg3, 0), + reg2, operands[3]); + PUT_MODE (SET_SRC (load), Pmode); + emit_insn (load); + /* ??? The following add could be eliminated if we used ptrel. */ + emit_move_insn (reg5, gen_rtx_PLUS (Pmode, reg3, reg4)); + emit_jump_insn (gen_casesi_jump_media (reg5, operands[3])); + emit_barrier (); + DONE; + } + operands[1] = copy_to_mode_reg (SImode, operands[1]); + operands[2] = copy_to_mode_reg (SImode, operands[2]); + /* If optimizing, casesi_worker depends on the mode of the instruction + before label it 'uses' - operands[3]. */ + emit_insn (gen_casesi_0 (operands[0], operands[1], operands[2], operands[4], + reg)); + emit_insn (gen_casesi_worker_0 (reg2, reg, operands[3])); + if (TARGET_SH2) + emit_jump_insn (gen_casesi_jump_2 (reg2, gen_label_rtx (), operands[3])); + else + emit_jump_insn (gen_casesi_jump_1 (reg2, operands[3])); + /* For SH2 and newer, the ADDR_DIFF_VEC is not actually relative to + operands[3], but to lab. We will fix this up in + machine_dependent_reorg. */ + emit_barrier (); + DONE; +}) + +(define_expand "casesi_0" + [(set (match_operand:SI 4 "" "") (match_operand:SI 0 "arith_reg_operand" "")) + (set (match_dup 4) (minus:SI (match_dup 4) + (match_operand:SI 1 "arith_operand" ""))) + (set (reg:SI T_REG) + (gtu:SI (match_dup 4) + (match_operand:SI 2 "arith_reg_operand" ""))) + (set (pc) + (if_then_else (ne (reg:SI T_REG) + (const_int 0)) + (label_ref (match_operand 3 "" "")) + (pc)))] + "TARGET_SH1" + "") + +;; ??? reload might clobber r0 if we use it explicitly in the RTL before +;; reload; using a R0_REGS pseudo reg is likely to give poor code. +;; So we keep the use of r0 hidden in a R0_REGS clobber until after reload. +(define_insn "casesi_worker_0" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI [(match_operand:SI 1 "register_operand" "0,r") + (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI)) + (clobber (match_scratch:SI 3 "=X,1")) + (clobber (match_scratch:SI 4 "=&z,z"))] + "TARGET_SH1" + "#") + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_operand:SI 1 "register_operand" "") + (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 ""))] + "TARGET_SH1 && ! TARGET_SH2 && reload_completed" + [(set (reg:SI R0_REG) (unspec:SI [(label_ref (match_dup 2))] UNSPEC_MOVA)) + (parallel [(set (match_dup 0) + (unspec:SI [(reg:SI R0_REG) (match_dup 1) + (label_ref (match_dup 2))] UNSPEC_CASESI)) + (clobber (match_dup 3))]) + (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI R0_REG)))] +{ + if (GET_CODE (operands[2]) == CODE_LABEL) + LABEL_NUSES (operands[2])++; +}) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (unspec:SI [(match_operand:SI 1 "register_operand" "") + (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI)) + (clobber (match_scratch:SI 3 "")) + (clobber (match_scratch:SI 4 ""))] + "TARGET_SH2 && reload_completed" + [(set (reg:SI R0_REG) (unspec:SI [(label_ref (match_dup 2))] UNSPEC_MOVA)) + (parallel [(set (match_dup 0) + (unspec:SI [(reg:SI R0_REG) (match_dup 1) + (label_ref (match_dup 2))] UNSPEC_CASESI)) + (clobber (match_dup 3))])] +{ + if (GET_CODE (operands[2]) == CODE_LABEL) + LABEL_NUSES (operands[2])++; +}) + +(define_insn "casesi_worker_1" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI [(reg:SI R0_REG) + (match_operand:SI 1 "register_operand" "0,r") + (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI)) + (clobber (match_scratch:SI 3 "=X,1"))] + "TARGET_SH1" +{ + rtx diff_vec = PATTERN (NEXT_INSN (operands[2])); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + switch (GET_MODE (diff_vec)) + { + case SImode: + return "shll2 %1" "\n" + " mov.l @(r0,%1),%0"; + case HImode: + return "add %1,%1" "\n" + " mov.w @(r0,%1),%0"; + case QImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + return "mov.b @(r0,%1),%0" "\n" + " extu.b %0,%0"; + else + return "mov.b @(r0,%1),%0"; + + default: + gcc_unreachable (); + } +} + [(set_attr "length" "4")]) + +(define_insn "casesi_worker_2" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI [(reg:SI R0_REG) + (match_operand:SI 1 "register_operand" "0,r") + (label_ref (match_operand 2 "" "")) + (label_ref (match_operand 3 "" ""))] UNSPEC_CASESI)) + (clobber (match_operand:SI 4 "" "=X,1"))] + "TARGET_SH2 && reload_completed && flag_pic" +{ + rtx diff_vec = PATTERN (NEXT_INSN (operands[2])); + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + switch (GET_MODE (diff_vec)) + { + case SImode: + return "shll2 %1" "\n" + " add r0,%1" "\n" + " mova %O3,r0" "\n" + " mov.l @(r0,%1),%0"; + case HImode: + return "add %1,%1" "\n" + " add r0,%1" "\n" + " mova %O3,r0" "\n" + " mov.w @(r0,%1),%0"; + case QImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + return "add r0,%1" "\n" + " mova %O3,r0" "\n" + " mov.b @(r0,%1),%0" "\n" + " extu.b %0,%0"; + else + return "add r0,%1" "\n" + " mova %O3,r0" "\n" + " mov.b @(r0,%1),%0"; + default: + gcc_unreachable (); + } +} + [(set_attr "length" "8")]) + +(define_insn "casesi_shift_media" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ashift:DI (match_operand:DI 1 "arith_reg_operand" "r") + (unspec:DI [(label_ref:DI (match_operand 2 "" ""))] + UNSPEC_CASESI)))] + "TARGET_SHMEDIA" +{ + rtx diff_vec = PATTERN (NEXT_INSN (operands[2])); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + switch (GET_MODE (diff_vec)) + { + case SImode: + return "shlli %1, 2, %0"; + case HImode: + return "shlli %1, 1, %0"; + case QImode: + if (rtx_equal_p (operands[0], operands[1])) + return ""; + return "add %1, r63, %0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "arith_media")]) + +(define_insn "casesi_load_media" + [(set (match_operand 0 "any_arith_reg_dest" "=r") + (mem (unspec [(match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "arith_reg_operand" "r") + (label_ref:DI (match_operand 3 "" ""))] UNSPEC_CASESI)))] + "TARGET_SHMEDIA" +{ + rtx diff_vec = PATTERN (NEXT_INSN (operands[3])); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + switch (GET_MODE (diff_vec)) + { + case SImode: + return "ldx.l %1, %2, %0"; + case HImode: +#if 0 + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + return "ldx.uw %1, %2, %0"; +#endif + return "ldx.w %1, %2, %0"; + case QImode: + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + return "ldx.ub %1, %2, %0"; + return "ldx.b %1, %2, %0"; + default: + gcc_unreachable (); + } +} + [(set_attr "type" "load_media")]) + +(define_expand "simple_return" + [(simple_return)] + "sh_can_use_simple_return_p ()") + +(define_expand "return" + [(return)] + "reload_completed && epilogue_completed" +{ + if (TARGET_SHMEDIA) + { + emit_jump_insn (gen_return_media ()); + DONE; + } + + if (TARGET_SHCOMPACT + && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))) + { + emit_jump_insn (gen_shcompact_return_tramp ()); + DONE; + } +}) + +(define_insn "*<code>_i" + [(any_return)] + "TARGET_SH1 && ! (TARGET_SHCOMPACT + && (crtl->args.info.call_cookie + & CALL_COOKIE_RET_TRAMP (1))) + && reload_completed + && ! sh_cfun_trap_exit_p ()" +{ + if (TARGET_SH2A && (dbr_sequence_length () == 0) + && !current_function_interrupt) + return "rts/n"; + else + return "%@ %#"; +} + [(set_attr "type" "return") + (set_attr "needs_delay_slot" "yes")]) + +;; trapa has no delay slot. +(define_insn "*return_trapa" + [(return)] + "TARGET_SH1 && !TARGET_SHCOMPACT + && reload_completed" + "%@" + [(set_attr "type" "return")]) + +(define_expand "shcompact_return_tramp" + [(return)] + "TARGET_SHCOMPACT + && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))" +{ + rtx reg = gen_rtx_REG (Pmode, R0_REG); + + function_symbol (reg, "__GCC_shcompact_return_trampoline", SFUNC_STATIC); + emit_jump_insn (gen_shcompact_return_tramp_i ()); + DONE; +}) + +(define_insn "shcompact_return_tramp_i" + [(parallel [(return) (use (reg:SI R0_REG))])] + "TARGET_SHCOMPACT + && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))" + "jmp @r0%#" + [(set_attr "type" "jump_ind") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "return_media_i" + [(parallel [(return) (use (match_operand 0 "target_reg_operand" "k"))])] + "TARGET_SHMEDIA && reload_completed" + "blink %0, r63" + [(set_attr "type" "jump_media")]) + +(define_insn "return_media_rte" + [(return)] + "TARGET_SHMEDIA && reload_completed && current_function_interrupt" + "rte" + [(set_attr "type" "jump_media")]) + +(define_expand "return_media" + [(return)] + "TARGET_SHMEDIA && reload_completed" +{ + int tr_regno = sh_media_register_for_return (); + rtx tr; + + if (current_function_interrupt) + { + emit_jump_insn (gen_return_media_rte ()); + DONE; + } + if (tr_regno < 0) + { + rtx r18 = gen_rtx_REG (Pmode, PR_MEDIA_REG); + + gcc_assert (call_really_used_regs[TR0_REG] && !fixed_regs[TR0_REG]); + tr_regno = TR0_REG; + tr = gen_rtx_REG (Pmode, tr_regno); + emit_move_insn (tr, r18); + } + else + tr = gen_rtx_REG (Pmode, tr_regno); + + emit_jump_insn (gen_return_media_i (tr)); + DONE; +}) + +(define_insn "shcompact_preserve_incoming_args" + [(set (match_operand:SI 0 "register_operand" "+r") + (unspec:SI [(match_dup 0)] UNSPEC_COMPACT_ARGS))] + "TARGET_SHCOMPACT" + "" + [(set_attr "length" "0")]) + +(define_insn "shcompact_incoming_args" + [(set (reg:SI R2_REG) (unspec:SI [(reg:SI R2_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R3_REG) (unspec:SI [(reg:SI R3_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R4_REG) (unspec:SI [(reg:SI R4_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R5_REG) (unspec:SI [(reg:SI R5_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R6_REG) (unspec:SI [(reg:SI R6_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R7_REG) (unspec:SI [(reg:SI R7_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R8_REG) (unspec:SI [(reg:SI R8_REG)] UNSPEC_COMPACT_ARGS)) + (set (reg:SI R9_REG) (unspec:SI [(reg:SI R9_REG)] UNSPEC_COMPACT_ARGS)) + (set (mem:BLK (reg:SI MACL_REG)) + (unspec:BLK [(reg:SI MACH_REG)] UNSPEC_COMPACT_ARGS)) + (use (reg:SI R0_REG)) + (clobber (reg:SI R0_REG)) + (clobber (reg:SI MACL_REG)) + (clobber (reg:SI MACH_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT" + "jsr @r0%#" + [(set_attr "needs_delay_slot" "yes")]) + +(define_insn "shmedia_save_restore_regs_compact" + [(set (reg:SI SP_REG) + (plus:SI (reg:SI SP_REG) + (match_operand:SI 0 "immediate_operand" "i"))) + (use (reg:SI R0_REG)) + (clobber (reg:SI PR_REG))] + "TARGET_SHCOMPACT + && (INTVAL (operands[0]) == SHMEDIA_REGS_STACK_ADJUST () + || INTVAL (operands[0]) == - SHMEDIA_REGS_STACK_ADJUST ())" + "jsr @r0%#" + [(set_attr "needs_delay_slot" "yes")]) + +(define_expand "prologue" + [(const_int 0)] + "" +{ + sh_expand_prologue (); + DONE; +}) + +(define_expand "epilogue" + [(return)] + "" +{ + sh_expand_epilogue (false); + if (TARGET_SHMEDIA + || (TARGET_SHCOMPACT + && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1)))) + { + emit_jump_insn (gen_return ()); + DONE; + } +}) + +(define_expand "eh_return" + [(use (match_operand 0 "register_operand" ""))] + "" +{ + rtx ra = operands[0]; + + if (TARGET_SHMEDIA64) + emit_insn (gen_eh_set_ra_di (ra)); + else + emit_insn (gen_eh_set_ra_si (ra)); + + DONE; +}) + +;; Clobber the return address on the stack. We can't expand this +;; until we know where it will be put in the stack frame. + +(define_insn "eh_set_ra_si" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] + UNSPECV_EH_RETURN) + (clobber (match_scratch:SI 1 "=&r"))] + "! TARGET_SHMEDIA64" + "#") + +(define_insn "eh_set_ra_di" + [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")] + UNSPECV_EH_RETURN) + (clobber (match_scratch:DI 1 "=&r"))] + "TARGET_SHMEDIA64" + "#") + +(define_split + [(unspec_volatile [(match_operand 0 "register_operand" "")] + UNSPECV_EH_RETURN) + (clobber (match_scratch 1 ""))] + "reload_completed" + [(const_int 0)] +{ + sh_set_return_address (operands[0], operands[1]); + DONE; +}) + +(define_insn "blockage" + [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)] + "" + "" + [(set_attr "length" "0")]) + +;; Define movml instructions for SH2A target. Currently they are +;; used to push and pop all banked registers only. + +(define_insn "movml_push_banked" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus (match_dup 0) (const_int -32))) + (set (mem:SI (plus:SI (match_dup 0) (const_int 28))) (reg:SI R7_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 24))) (reg:SI R6_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 20))) (reg:SI R5_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 16))) (reg:SI R4_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 12))) (reg:SI R3_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 8))) (reg:SI R2_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 4))) (reg:SI R1_REG)) + (set (mem:SI (plus:SI (match_dup 0) (const_int 0))) (reg:SI R0_REG))] + "TARGET_SH2A && REGNO (operands[0]) == 15" + "movml.l r7,@-r15" + [(set_attr "in_delay_slot" "no")]) + +(define_insn "movml_pop_banked" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus (match_dup 0) (const_int 32))) + (set (reg:SI R0_REG) (mem:SI (plus:SI (match_dup 0) (const_int -32)))) + (set (reg:SI R1_REG) (mem:SI (plus:SI (match_dup 0) (const_int -28)))) + (set (reg:SI R2_REG) (mem:SI (plus:SI (match_dup 0) (const_int -24)))) + (set (reg:SI R3_REG) (mem:SI (plus:SI (match_dup 0) (const_int -20)))) + (set (reg:SI R4_REG) (mem:SI (plus:SI (match_dup 0) (const_int -16)))) + (set (reg:SI R5_REG) (mem:SI (plus:SI (match_dup 0) (const_int -12)))) + (set (reg:SI R6_REG) (mem:SI (plus:SI (match_dup 0) (const_int -8)))) + (set (reg:SI R7_REG) (mem:SI (plus:SI (match_dup 0) (const_int -4))))] + "TARGET_SH2A && REGNO (operands[0]) == 15" + "movml.l @r15+,r7" + [(set_attr "in_delay_slot" "no")]) + +;; ------------------------------------------------------------------------ +;; Scc instructions +;; ------------------------------------------------------------------------ + +(define_insn "movt" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (match_operand:SI 1 "t_reg_operand"))] + "TARGET_SH1" + "movt %0" + [(set_attr "type" "arith")]) + +(define_insn "movrt" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))] + "TARGET_SH2A" + "movrt %0" + [(set_attr "type" "arith")]) + +(define_expand "cstore4_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 1 "sh_float_comparison_operator" + [(match_operand 2 "logical_operand" "") + (match_operand 3 "cmp_operand" "")]))] + "TARGET_SHMEDIA" +{ + enum machine_mode mode = GET_MODE (operands[2]); + enum rtx_code code = GET_CODE (operands[1]); + bool invert, swap; + if (mode == VOIDmode) + mode = GET_MODE (operands[3]); + if (operands[2] == const0_rtx) + { + if (code == EQ || code == NE) + operands[2] = operands[3], operands[3] = const0_rtx; + } + else + operands[2] = force_reg (mode, operands[2]); + if (operands[3] != const0_rtx) + operands[3] = force_reg (mode, operands[3]); + + switch (code) + { + case GEU: + case GE: + swap = invert = !FLOAT_MODE_P (mode); + break; + + case LEU: + case LE: + swap = FLOAT_MODE_P (mode), invert = !swap; + break; + + case LTU: + case LT: + swap = true, invert = false; + break; + + case GTU: + case GT: + case EQ: + case UNORDERED: + swap = invert = false; + break; + + case NE: + swap = invert = true; + break; + + default: + gcc_unreachable (); + } + + if (swap) + { + rtx tem = operands[2]; + operands[2] = operands[3]; + operands[3] = tem; + code = swap_condition (code); + } + + if (invert) + { + rtx tem = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0]; + code = reverse_condition (code); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[2], operands[3]); + emit_insn (gen_cstore4_media (tem, operands[1], + operands[2], operands[3])); + code = EQ; + operands[2] = tem; + operands[3] = const0_rtx; + } + + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[2], operands[3]); +}) + +(define_expand "cstoresi4" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 1 "comparison_operator" + [(match_operand:SI 2 "cmpsi_operand" "") + (match_operand:SI 3 "arith_operand" "")]))] + "TARGET_SH1 || TARGET_SHMEDIA" +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_cstore4_media (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + + if (sh_expand_t_scc (operands)) + DONE; + + if (! currently_expanding_to_rtl) + FAIL; + + sh_emit_compare_and_set (operands, SImode); + DONE; +}) + +(define_expand "cstoredi4" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 1 "comparison_operator" + [(match_operand:DI 2 "arith_operand" "") + (match_operand:DI 3 "arith_operand" "")]))] + "TARGET_SH2 || TARGET_SHMEDIA" +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_cstore4_media (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + + if (sh_expand_t_scc (operands)) + DONE; + + if (! currently_expanding_to_rtl) + FAIL; + + sh_emit_compare_and_set (operands, DImode); + DONE; +}) + +;; Move the complement of the T reg to a reg. +;; On SH2A the movrt insn can be used. +;; On anything else than SH2A this has to be done with multiple instructions. +;; One obvious way would be: +;; cmp/eq ... +;; movt r0 +;; xor #1,r0 +;; +;; However, this puts pressure on r0 in most cases and thus the following is +;; more appealing: +;; cmp/eq ... +;; mov #-1,temp +;; negc temp,dest +;; +;; If the constant -1 can be CSE-ed or lifted out of a loop it effectively +;; becomes a one instruction operation. Moreover, care must be taken that +;; the insn can still be combined with inverted compare and branch code +;; around it. On the other hand, if a function returns the complement of +;; a previous comparison result in the T bit, the xor #1,r0 approach might +;; lead to better code. +(define_expand "movnegt" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))] + "TARGET_SH1" +{ + if (TARGET_SH2A) + emit_insn (gen_movrt (operands[0], operands[1])); + else + { + rtx val = force_reg (SImode, gen_int_mode (-1, SImode)); + emit_insn (gen_movrt_negc (operands[0], operands[1], val)); + } + DONE; +}) + +(define_insn "movrt_negc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1))) + (set (reg:SI T_REG) (const_int 1)) + (use (match_operand:SI 2 "arith_reg_operand" "r"))] + "TARGET_SH1" + "negc %2,%0" + [(set_attr "type" "arith")]) + +;; The -1 constant will not be CSE-ed for the *movrt_negc pattern, but the +;; pattern can be used by the combine pass. Using a scratch reg for the +;; -1 constant results in slightly better register allocations compared to +;; generating a pseudo reg before reload. +(define_insn_and_split "*movrt_negc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1))) + (clobber (match_scratch:SI 2 "=r")) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && ! TARGET_SH2A" + "#" + "&& reload_completed" + [(set (match_dup 2) (const_int -1)) + (parallel + [(set (match_dup 0) (xor:SI (match_dup 1) (const_int 1))) + (set (reg:SI T_REG) (const_int 1)) + (use (match_dup 2))])]) + +;; Store the negated T bit in a reg using r0 and xor. This one doesn't +;; clobber the T bit, which is useful when storing the T bit and the +;; negated T bit in parallel. On SH2A the movrt insn can be used for that. +;; Usually we don't want this insn to be matched, except for cases where the +;; T bit clobber is really not appreciated. Hence the extra use on T_REG. +(define_insn_and_split "movrt_xor" + [(set (match_operand:SI 0 "arith_reg_dest" "=z") + (xor:SI (match_operand:SI 1 "t_reg_operand") (const_int 1))) + (use (reg:SI T_REG))] + "TARGET_SH1 && !TARGET_SH2A" + "#" + "&& reload_completed" + [(set (match_dup 0) (reg:SI T_REG)) + (set (match_dup 0) (xor:SI (match_dup 0) (const_int 1)))]) + +;; Store the T bit and the negated T bit in two regs in parallel. There is +;; no real insn to do that, but specifying this pattern will give combine +;; some opportunities. +(define_insn_and_split "*movt_movrt" + [(parallel [(set (match_operand:SI 0 "arith_reg_dest") + (match_operand:SI 1 "negt_reg_operand")) + (set (match_operand:SI 2 "arith_reg_dest") + (match_operand:SI 3 "t_reg_operand"))])] + "TARGET_SH1" + "#" + "&& 1" + [(const_int 0)] +{ + rtx i = TARGET_SH2A + ? gen_movrt (operands[0], get_t_reg_rtx ()) + : gen_movrt_xor (operands[0], get_t_reg_rtx ()); + + emit_insn (i); + emit_insn (gen_movt (operands[2], get_t_reg_rtx ())); + DONE; +}) + +(define_insn_and_split "*movt_movrt" + [(parallel [(set (match_operand:SI 0 "arith_reg_dest") + (match_operand:SI 1 "t_reg_operand")) + (set (match_operand:SI 2 "arith_reg_dest") + (match_operand:SI 3 "negt_reg_operand"))])] + "TARGET_SH1" + "#" + "&& 1" + [(parallel [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) (match_dup 1))])]) + +;; Use negc to store the T bit in a MSB of a reg in the following way: +;; T = 1: 0x80000000 -> reg +;; T = 0: 0x7FFFFFFF -> reg +;; This works because 0 - 0x80000000 = 0x80000000. +;; +;; This insn must not match again after it has been split into the constant +;; load and negc. This is accomplished by the special negc insn that +;; has a use on the operand. +(define_insn_and_split "*mov_t_msb_neg" + [(set (match_operand:SI 0 "arith_reg_dest") + (minus:SI (const_int -2147483648) ;; 0x80000000 + (match_operand 1 "t_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(set (match_dup 2) (const_int -2147483648)) + (parallel [(set (match_dup 0) (minus:SI (neg:SI (match_dup 2)) + (reg:SI T_REG))) + (clobber (reg:SI T_REG)) + (use (match_dup 2))])] +{ + operands[2] = gen_reg_rtx (SImode); +}) + +(define_insn "*mov_t_msb_neg_negc" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (minus:SI (neg:SI (match_operand:SI 1 "arith_reg_operand" "r")) + (match_operand:SI 2 "t_reg_operand"))) + (clobber (reg:SI T_REG)) + (use (match_dup 1))] + "TARGET_SH1" + "negc %1,%0" + [(set_attr "type" "arith")]) + +;; These are essentially the same as above, but with the inverted T bit. +;; Combine recognizes the split patterns, but does not take them sometimes +;; if the T_REG clobber is specified. Instead it tries to split out the +;; T bit negation. Since these splits are supposed to be taken only by +;; combine, it will see the T_REG clobber of the *mov_t_msb_neg insn, so this +;; should be fine. +(define_split + [(set (match_operand:SI 0 "arith_reg_dest") + (plus:SI (match_operand 1 "negt_reg_operand") + (const_int 2147483647)))] ;; 0x7fffffff + "TARGET_SH1 && can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (minus:SI (const_int -2147483648) (reg:SI T_REG))) + (clobber (reg:SI T_REG))])]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest") + (if_then_else:SI (match_operand 1 "t_reg_operand") + (const_int 2147483647) ;; 0x7fffffff + (const_int -2147483648)))] ;; 0x80000000 + "TARGET_SH1 && can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (minus:SI (const_int -2147483648) (reg:SI T_REG))) + (clobber (reg:SI T_REG))])]) + +;; The *negnegt pattern helps the combine pass to figure out how to fold +;; an explicit double T bit negation. +(define_insn_and_split "*negnegt" + [(set (reg:SI T_REG) + (eq:SI (match_operand 0 "negt_reg_operand" "") (const_int 0)))] + "TARGET_SH1" + "#" + "" + [(const_int 0)]) + +;; Store T bit as all zeros or ones in a reg. +(define_insn "mov_neg_si_t" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (neg:SI (match_operand 1 "t_reg_operand" "")))] + "TARGET_SH1" + "subc %0,%0" + [(set_attr "type" "arith")]) + +;; Store negated T bit as all zeros or ones in a reg. +;; Use the following sequence: +;; subc Rn,Rn ! Rn = Rn - Rn - T; T = T +;; not Rn,Rn ! Rn = 0 - Rn +(define_split + [(set (match_operand:SI 0 "arith_reg_dest" "") + (neg:SI (match_operand 1 "negt_reg_operand" "")))] + "TARGET_SH1" + [(set (match_dup 0) (neg:SI (reg:SI T_REG))) + (set (match_dup 0) (not:SI (match_dup 0)))]) + +;; The *movtt pattern eliminates redundant T bit to T bit moves / tests. +(define_insn_and_split "*movtt" + [(set (reg:SI T_REG) + (eq:SI (match_operand 0 "t_reg_operand" "") (const_int 1)))] + "TARGET_SH1" + "#" + "" + [(const_int 0)]) + +;; Invert the T bit. +;; On SH2A we can use the nott insn. On anything else this must be done with +;; multiple insns like: +;; movt Rn +;; tst Rn,Rn +;; This requires an additional pseudo. The SH specific sh_treg_combine RTL +;; pass will look for this insn. Disallow using it if pseudos can't be +;; created. +(define_insn_and_split "nott" + [(set (reg:SI T_REG) + (xor:SI (match_operand:SI 0 "t_reg_operand") (const_int 1)))] + "TARGET_SH2A || (TARGET_SH1 && can_create_pseudo_p ())" +{ + gcc_assert (TARGET_SH2A); + return "nott"; +} + "! TARGET_SH2A && can_create_pseudo_p ()" + [(set (match_dup 0) (reg:SI T_REG)) + (set (reg:SI T_REG) (eq:SI (match_dup 0) (const_int 0)))] +{ + operands[0] = gen_reg_rtx (SImode); +}) + +;; Store T bit as MSB in a reg. +;; T = 0: 0x00000000 -> reg +;; T = 1: 0x80000000 -> reg +(define_insn_and_split "*movt_msb" + [(set (match_operand:SI 0 "arith_reg_dest") + (mult:SI (match_operand:SI 1 "t_reg_operand") + (const_int -2147483648))) ;; 0xffffffff80000000 + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& 1" + [(set (match_dup 0) (ashift:SI (reg:SI T_REG) (const_int 31)))]) + +;; Store inverted T bit as MSB in a reg. +;; T = 0: 0x80000000 -> reg +;; T = 1: 0x00000000 -> reg +;; On SH2A we can get away without clobbering the T_REG. +(define_insn_and_split "*negt_msb" + [(set (match_operand:SI 0 "arith_reg_dest") + (match_operand:SI 1 "negt_reg_shl31_operand"))] + "TARGET_SH2A" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_movrt (tmp, get_t_reg_rtx ())); + emit_insn (gen_rotrsi3 (operands[0], tmp, const1_rtx)); + DONE; +}) + +(define_insn_and_split "*negt_msb" + [(set (match_operand:SI 0 "arith_reg_dest") + (match_operand:SI 1 "negt_reg_shl31_operand")) + (clobber (reg:SI T_REG))] + "TARGET_SH1 && !TARGET_SH2A" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + rtx tmp = gen_reg_rtx (SImode); + emit_move_insn (tmp, get_t_reg_rtx ()); + emit_insn (gen_cmpeqsi_t (tmp, const0_rtx)); + emit_insn (gen_rotcr (operands[0], tmp, get_t_reg_rtx ())); + DONE; +}) + +;; The *cset_zero patterns convert optimizations such as +;; "if (test) x = 0;" +;; to +;; "x &= -(test == 0);" +;; back to conditional branch sequences if zero-displacement branches +;; are enabled. +;; FIXME: These patterns can be removed when conditional execution patterns +;; are implemented, since ifcvt will not perform these optimizations if +;; conditional execution is supported. +(define_insn "*cset_zero" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (and:SI (plus:SI (match_operand:SI 1 "t_reg_operand") + (const_int -1)) + (match_operand:SI 2 "arith_reg_operand" "0")))] + "TARGET_SH1 && TARGET_ZDCBRANCH" +{ + return "bf 0f" "\n" + " mov #0,%0" "\n" + "0:"; +} + [(set_attr "type" "arith") ;; poor approximation + (set_attr "length" "4")]) + +(define_insn "*cset_zero" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (if_then_else:SI (match_operand:SI 1 "t_reg_operand") + (match_operand:SI 2 "arith_reg_operand" "0") + (const_int 0)))] + "TARGET_SH1 && TARGET_ZDCBRANCH" +{ + return "bt 0f" "\n" + " mov #0,%0" "\n" + "0:"; +} + [(set_attr "type" "arith") ;; poor approximation + (set_attr "length" "4")]) + +(define_expand "cstoresf4" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 1 "sh_float_comparison_operator" + [(match_operand:SF 2 "arith_operand" "") + (match_operand:SF 3 "arith_operand" "")]))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_cstore4_media (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + + if (! currently_expanding_to_rtl) + FAIL; + + sh_emit_compare_and_set (operands, SFmode); + DONE; +}) + +(define_expand "cstoredf4" + [(set (match_operand:SI 0 "register_operand" "=r") + (match_operator:SI 1 "sh_float_comparison_operator" + [(match_operand:DF 2 "arith_operand" "") + (match_operand:DF 3 "arith_operand" "")]))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SHMEDIA) + { + emit_insn (gen_cstore4_media (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + + if (! currently_expanding_to_rtl) + FAIL; + + sh_emit_compare_and_set (operands, DFmode); + DONE; +}) + +;; ------------------------------------------------------------------------- +;; Instructions to cope with inline literal tables +;; ------------------------------------------------------------------------- + +;; 2 byte integer in line +(define_insn "consttable_2" + [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g") + (match_operand 1 "" "")] + UNSPECV_CONST2)] + "" +{ + if (operands[1] != const0_rtx) + assemble_integer (operands[0], 2, BITS_PER_UNIT * 2, 1); + return ""; +} + [(set_attr "length" "2") + (set_attr "in_delay_slot" "no")]) + +;; 4 byte integer in line +(define_insn "consttable_4" + [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g") + (match_operand 1 "" "")] + UNSPECV_CONST4)] + "" +{ + if (operands[1] != const0_rtx) + { + assemble_integer (operands[0], 4, BITS_PER_UNIT * 4, 1); + mark_symbol_refs_as_used (operands[0]); + } + return ""; +} + [(set_attr "length" "4") + (set_attr "in_delay_slot" "no")]) + +;; 8 byte integer in line +(define_insn "consttable_8" + [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g") + (match_operand 1 "" "")] + UNSPECV_CONST8)] + "" +{ + if (operands[1] != const0_rtx) + assemble_integer (operands[0], 8, BITS_PER_UNIT * 8, 1); + return ""; +} + [(set_attr "length" "8") + (set_attr "in_delay_slot" "no")]) + +;; 4 byte floating point +(define_insn "consttable_sf" + [(unspec_volatile [(match_operand:SF 0 "general_operand" "=g") + (match_operand 1 "" "")] + UNSPECV_CONST4)] + "" +{ + if (operands[1] != const0_rtx) + { + REAL_VALUE_TYPE d; + REAL_VALUE_FROM_CONST_DOUBLE (d, operands[0]); + assemble_real (d, SFmode, GET_MODE_ALIGNMENT (SFmode)); + } + return ""; +} + [(set_attr "length" "4") + (set_attr "in_delay_slot" "no")]) + +;; 8 byte floating point +(define_insn "consttable_df" + [(unspec_volatile [(match_operand:DF 0 "general_operand" "=g") + (match_operand 1 "" "")] + UNSPECV_CONST8)] + "" +{ + if (operands[1] != const0_rtx) + { + REAL_VALUE_TYPE d; + REAL_VALUE_FROM_CONST_DOUBLE (d, operands[0]); + assemble_real (d, DFmode, GET_MODE_ALIGNMENT (DFmode)); + } + return ""; +} + [(set_attr "length" "8") + (set_attr "in_delay_slot" "no")]) + +;; Alignment is needed for some constant tables; it may also be added for +;; Instructions at the start of loops, or after unconditional branches. +;; ??? We would get more accurate lengths if we did instruction +;; alignment based on the value of INSN_CURRENT_ADDRESS; the approach used +;; here is too conservative. + +;; align to a two byte boundary +(define_expand "align_2" + [(unspec_volatile [(const_int 1)] UNSPECV_ALIGN)] + "" + "") + +;; Align to a four byte boundary. +;; align_4 and align_log are instructions for the starts of loops, or +;; after unconditional branches, which may take up extra room. +(define_expand "align_4" + [(unspec_volatile [(const_int 2)] UNSPECV_ALIGN)] + "" + "") + +;; Align to a cache line boundary. +(define_insn "align_log" + [(unspec_volatile [(match_operand 0 "const_int_operand" "")] UNSPECV_ALIGN)] + "" + "" + [(set_attr "length" "0") + (set_attr "in_delay_slot" "no")]) + +;; Emitted at the end of the literal table, used to emit the +;; 32bit branch labels if needed. +(define_insn "consttable_end" + [(unspec_volatile [(const_int 0)] UNSPECV_CONST_END)] + "" +{ + return output_jump_label_table (); +} + [(set_attr "in_delay_slot" "no")]) + +;; Emitted at the end of the window in the literal table. +(define_insn "consttable_window_end" + [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_WINDOW_END)] + "" + "" + [(set_attr "length" "0") + (set_attr "in_delay_slot" "no")]) + +;; ------------------------------------------------------------------------- +;; Minimum / maximum operations. +;; ------------------------------------------------------------------------- + +;; The SH2A clips.b and clips.w insns do a signed min-max function. If smin +;; and smax standard name patterns are defined, they will be used during +;; initial expansion and combine will then be able to form the actual min-max +;; pattern. +;; The clips.b and clips.w set the SR.CS bit if the value in the register is +;; clipped, but there is currently no way of making use of this information. +;; The only way to read or reset the SR.CS bit is by accessing the SR. +(define_expand "<code>si3" + [(parallel [(set (match_operand:SI 0 "arith_reg_dest") + (SMIN_SMAX:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand 2 "const_int_operand"))) + (clobber (reg:SI T_REG))])] + "TARGET_SH2A" +{ + /* Force the comparison value into a register, because greater-than + comparisons can work only on registers. Combine will be able to pick up + the constant value from the REG_EQUAL note when trying to form a min-max + pattern. */ + operands[2] = force_reg (SImode, operands[2]); +}) + +;; Convert +;; smax (smin (...)) +;; to +;; smin (smax (...)) +(define_insn_and_split "*clips" + [(set (match_operand:SI 0 "arith_reg_dest") + (smax:SI (smin:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand 2 "clips_max_const_int")) + (match_operand 3 "clips_min_const_int")))] + "TARGET_SH2A" + "#" + "&& 1" + [(set (match_dup 0) + (smin:SI (smax:SI (match_dup 1) (match_dup 3)) (match_dup 2)))]) + +(define_insn "*clips" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (smin:SI (smax:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand 2 "clips_min_const_int")) + (match_operand 3 "clips_max_const_int")))] + "TARGET_SH2A" +{ + if (INTVAL (operands[3]) == 127) + return "clips.b %0"; + else if (INTVAL (operands[3]) == 32767) + return "clips.w %0"; + else + gcc_unreachable (); +} + [(set_attr "type" "arith")]) + +;; If the expanded smin or smax patterns were not combined, split them into +;; a compare and branch sequence, because there are no real smin or smax +;; insns. +(define_insn_and_split "*<code>si3" + [(set (match_operand:SI 0 "arith_reg_dest") + (SMIN_SMAX:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "arith_reg_or_0_or_1_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH2A && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] +{ + rtx skip_label = gen_label_rtx (); + emit_move_insn (operands[0], operands[1]); + + rtx cmp_val = operands[2]; + if (satisfies_constraint_M (cmp_val)) + cmp_val = const0_rtx; + + emit_insn (gen_cmpgtsi_t (operands[0], cmp_val)); + emit_jump_insn (<CODE> == SMIN + ? gen_branch_false (skip_label) + : gen_branch_true (skip_label)); + + emit_label_after (skip_label, emit_move_insn (operands[0], operands[2])); + DONE; +}) + +;; The SH2A clipu.b and clipu.w insns can be used to implement a min function +;; with a register and a constant. +;; The clipu.b and clipu.w set the SR.CS bit if the value in the register is +;; clipped, but there is currently no way of making use of this information. +;; The only way to read or reset the SR.CS bit is by accessing the SR. +(define_expand "uminsi3" + [(set (match_operand:SI 0 "arith_reg_dest") + (umin:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand 2 "const_int_operand")))] + "TARGET_SH2A" +{ + if (INTVAL (operands[2]) == 1) + { + emit_insn (gen_clipu_one (operands[0], operands[1])); + DONE; + } + else if (! clipu_max_const_int (operands[2], VOIDmode)) + FAIL; +}) + +(define_insn "*clipu" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (umin:SI (match_operand:SI 1 "arith_reg_operand" "0") + (match_operand 2 "clipu_max_const_int")))] + "TARGET_SH2A" +{ + if (INTVAL (operands[2]) == 255) + return "clipu.b %0"; + else if (INTVAL (operands[2]) == 65535) + return "clipu.w %0"; + else + gcc_unreachable (); +} + [(set_attr "type" "arith")]) + +(define_insn_and_split "clipu_one" + [(set (match_operand:SI 0 "arith_reg_dest") + (umin:SI (match_operand:SI 1 "arith_reg_operand") (const_int 1))) + (clobber (reg:SI T_REG))] + "TARGET_SH2A" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + emit_insn (gen_cmpeqsi_t (operands[1], const0_rtx)); + emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ())); + DONE; +}) + +;; ------------------------------------------------------------------------- +;; Misc +;; ------------------------------------------------------------------------- + +;; String/block move insn. + +(define_expand "movmemsi" + [(parallel [(set (mem:BLK (match_operand:BLK 0)) + (mem:BLK (match_operand:BLK 1))) + (use (match_operand:SI 2 "nonmemory_operand")) + (use (match_operand:SI 3 "immediate_operand")) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (clobber (reg:SI R0_REG))])] + "TARGET_SH1 && ! TARGET_SH5" +{ + if (expand_block_move (operands)) + DONE; + else + FAIL; +}) + +(define_insn "block_move_real" + [(parallel [(set (mem:BLK (reg:SI R4_REG)) + (mem:BLK (reg:SI R5_REG))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R0_REG))])] + "TARGET_SH1 && ! TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "block_lump_real" + [(parallel [(set (mem:BLK (reg:SI R4_REG)) + (mem:BLK (reg:SI R5_REG))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (use (reg:SI R6_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI T_REG)) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (clobber (reg:SI R6_REG)) + (clobber (reg:SI R0_REG))])] + "TARGET_SH1 && ! TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "block_move_real_i4" + [(parallel [(set (mem:BLK (reg:SI R4_REG)) + (mem:BLK (reg:SI R5_REG))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI R0_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R2_REG))])] + "TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "block_lump_real_i4" + [(parallel [(set (mem:BLK (reg:SI R4_REG)) + (mem:BLK (reg:SI R5_REG))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (use (reg:SI R6_REG)) + (clobber (reg:SI PR_REG)) + (clobber (reg:SI T_REG)) + (clobber (reg:SI R4_REG)) + (clobber (reg:SI R5_REG)) + (clobber (reg:SI R6_REG)) + (clobber (reg:SI R0_REG)) + (clobber (reg:SI R1_REG)) + (clobber (reg:SI R2_REG)) + (clobber (reg:SI R3_REG))])] + "TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +;; byte compare pattern +;; temp = a ^ b; +;; !((temp & 0xF000) && (temp & 0x0F00) && (temp & 0x00F0) && (temp & 0x000F)) +(define_insn "cmpstr_t" + [(set (reg:SI T_REG) + (eq:SI (and:SI + (and:SI + (and:SI + (zero_extract:SI + (xor:SI (match_operand:SI 0 "arith_reg_operand" "r") + (match_operand:SI 1 "arith_reg_operand" "r")) + (const_int 8) (const_int 0)) + (zero_extract:SI (xor:SI (match_dup 0) (match_dup 1)) + (const_int 8) (const_int 8))) + (zero_extract:SI (xor:SI (match_dup 0) (match_dup 1)) + (const_int 8) (const_int 16))) + (zero_extract:SI (xor:SI (match_dup 0) (match_dup 1)) + (const_int 8) (const_int 24))) + (const_int 0)))] + "TARGET_SH1" + "cmp/str %0,%1" + [(set_attr "type" "mt_group")]) + +(define_expand "cmpstrsi" + [(set (match_operand:SI 0 "register_operand") + (compare:SI (match_operand:BLK 1 "memory_operand") + (match_operand:BLK 2 "memory_operand"))) + (use (match_operand 3 "immediate_operand"))] + "TARGET_SH1 && optimize" +{ + if (! optimize_insn_for_size_p () && sh_expand_cmpstr (operands)) + DONE; + else + FAIL; +}) + +(define_expand "cmpstrnsi" + [(set (match_operand:SI 0 "register_operand") + (compare:SI (match_operand:BLK 1 "memory_operand") + (match_operand:BLK 2 "memory_operand"))) + (use (match_operand:SI 3 "immediate_operand")) + (use (match_operand:SI 4 "immediate_operand"))] + "TARGET_SH1 && optimize" +{ + if (! optimize_insn_for_size_p () && sh_expand_cmpnstr (operands)) + DONE; + else + FAIL; +}) + +(define_expand "strlensi" + [(set (match_operand:SI 0 "register_operand") + (unspec:SI [(match_operand:BLK 1 "memory_operand") + (match_operand:SI 2 "immediate_operand") + (match_operand:SI 3 "immediate_operand")] + UNSPEC_BUILTIN_STRLEN))] + "TARGET_SH1 && optimize" +{ + if (! optimize_insn_for_size_p () && sh_expand_strlen (operands)) + DONE; + else + FAIL; +}) + + +;; ------------------------------------------------------------------------- +;; Floating point instructions. +;; ------------------------------------------------------------------------- + +;; ??? All patterns should have a type attribute. + +(define_expand "movpsi" + [(set (match_operand:PSI 0 "register_operand" "") + (match_operand:PSI 1 "general_movsrc_operand" ""))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "") + +;; The c / m alternative is a fake to guide reload to load directly into +;; fpscr, since reload doesn't know how to use post-increment. +;; TARGET_LEGITIMATE_ADDRESS_P guards about bogus addresses before reload, +;; SECONDARY_INPUT_RELOAD_CLASS does this during reload, and the insn's +;; predicate after reload. +;; The mac_gp type for r/!c might look a bit odd, but it actually schedules +;; like a mac -> gpr move. +(define_insn "fpu_switch" + [(set (match_operand:PSI 0 "general_movdst_operand" "=c,c,r,c,c,r,m,r,<") + (match_operand:PSI 1 "general_movsrc_operand" "c,>,m,m,r,r,r,!c,c"))] + "TARGET_SH2E + && (! reload_completed + || true_regnum (operands[0]) != FPSCR_REG + || !MEM_P (operands[1]) + || GET_CODE (XEXP (operands[1], 0)) != PLUS)" + "@ + ! precision stays the same + lds.l %1,fpscr + mov.l %1,%0 + # + lds %1,fpscr + mov %1,%0 + mov.l %1,%0 + sts fpscr,%0 + sts.l fpscr,%0" + [(set_attr "length" "0,2,2,4,2,2,2,2,2") + (set_attr "type" "nil,mem_fpscr,load,mem_fpscr,gp_fpscr,move,store, + mac_gp,fstore")]) + +(define_peephole2 + [(set (reg:PSI FPSCR_REG) + (mem:PSI (match_operand:SI 0 "register_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && peep2_reg_dead_p (1, operands[0])" + [(const_int 0)] +{ + rtx fpscr, mem, new_insn; + + fpscr = SET_DEST (PATTERN (curr_insn)); + mem = SET_SRC (PATTERN (curr_insn)); + mem = replace_equiv_address (mem, gen_rtx_POST_INC (Pmode, operands[0])); + + new_insn = emit_insn (gen_fpu_switch (fpscr, mem)); + add_reg_note (new_insn, REG_INC, operands[0]); + DONE; +}) + +(define_split + [(set (reg:PSI FPSCR_REG) + (mem:PSI (match_operand:SI 0 "register_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) + && (flag_peephole2 ? epilogue_completed : reload_completed)" + [(const_int 0)] +{ + rtx fpscr, mem, new_insn; + + fpscr = SET_DEST (PATTERN (curr_insn)); + mem = SET_SRC (PATTERN (curr_insn)); + mem = replace_equiv_address (mem, gen_rtx_POST_INC (Pmode, operands[0])); + + new_insn = emit_insn (gen_fpu_switch (fpscr, mem)); + add_reg_note (new_insn, REG_INC, operands[0]); + + if (!find_regno_note (curr_insn, REG_DEAD, true_regnum (operands[0]))) + emit_insn (gen_addsi3 (operands[0], operands[0], GEN_INT (-4))); + DONE; +}) + +;; ??? This uses the fp unit, but has no type indicating that. +;; If we did that, this would either give a bogus latency or introduce +;; a bogus FIFO constraint. +;; Since this insn is currently only used for prologues/epilogues, +;; it is probably best to claim no function unit, which matches the +;; current setting. +(define_insn "toggle_sz" + [(set (reg:PSI FPSCR_REG) + (xor:PSI (reg:PSI FPSCR_REG) (const_int 1048576)))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fschg" + [(set_attr "type" "fpscr_toggle") (set_attr "fp_set" "unknown")]) + +;; There's no way we can use it today, since optimize mode switching +;; doesn't enable us to know from which mode we're switching to the +;; mode it requests, to tell whether we can use a relative mode switch +;; (like toggle_pr) or an absolute switch (like loading fpscr from +;; memory). +(define_insn "toggle_pr" + [(set (reg:PSI FPSCR_REG) + (xor:PSI (reg:PSI FPSCR_REG) (const_int 524288)))] + "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE" + "fpchg" + [(set_attr "type" "fpscr_toggle")]) + +(define_expand "addsf3" + [(set (match_operand:SF 0 "fp_arith_reg_operand") + (plus:SF (match_operand:SF 1 "fp_arith_reg_operand") + (match_operand:SF 2 "fp_arith_reg_operand")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH2E) + { + expand_sf_binop (&gen_addsf3_i, operands); + DONE; + } +}) + +(define_insn "*addsf3_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fadd.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + +(define_insn_and_split "unary_sf_op" + [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f") + (vec_select:V2SF + (vec_concat:V2SF + (vec_select:SF + (match_dup 0) + (parallel [(not:BI (match_operand 3 "const_int_operand" "n"))])) + (match_operator:SF 2 "unary_float_operator" + [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f") + (parallel [(match_operand 4 + "const_int_operand" "n")]))])) + (parallel [(not:BI (match_dup 3)) (match_dup 3)])))] + "TARGET_SHMEDIA_FPU" + "#" + "TARGET_SHMEDIA_FPU && reload_completed" + [(set (match_dup 5) (match_dup 6))] +{ + int endian = TARGET_LITTLE_ENDIAN ? 0 : 1; + rtx op1 = gen_rtx_REG (SFmode, + (true_regnum (operands[1]) + + (INTVAL (operands[4]) ^ endian))); + + operands[7] = gen_rtx_REG (SFmode, + (true_regnum (operands[0]) + + (INTVAL (operands[3]) ^ endian))); + operands[6] = gen_rtx_fmt_e (GET_CODE (operands[2]), SFmode, op1); +} + [(set_attr "type" "fparith_media")]) + +(define_insn_and_split "binary_sf_op0" + [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f") + (vec_concat:V2SF + (match_operator:SF 3 "binary_float_operator" + [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f") + (parallel [(const_int 0)])) + (vec_select:SF (match_operand:V2SF 2 "fp_arith_reg_operand" "f") + (parallel [(const_int 0)]))]) + (vec_select:SF + (match_dup 0) + (parallel [(const_int 1)]))))] + "TARGET_SHMEDIA_FPU" + "#" + "&& reload_completed" + [(set (match_dup 4) (match_dup 5))] +{ + int endian = TARGET_LITTLE_ENDIAN ? 0 : 1; + rtx op1 = gen_rtx_REG (SFmode, + true_regnum (operands[1]) + endian); + rtx op2 = gen_rtx_REG (SFmode, + true_regnum (operands[2]) + endian); + + operands[4] = gen_rtx_REG (SFmode, + true_regnum (operands[0]) + endian); + operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SFmode, op1, op2); +} + [(set_attr "type" "fparith_media")]) + +(define_insn_and_split "binary_sf_op1" + [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f") + (vec_concat:V2SF + (vec_select:SF + (match_dup 0) + (parallel [(const_int 0)])) + (match_operator:SF 3 "binary_float_operator" + [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f") + (parallel [(const_int 1)])) + (vec_select:SF (match_operand:V2SF 2 "fp_arith_reg_operand" "f") + (parallel [(const_int 1)]))])))] + "TARGET_SHMEDIA_FPU" + "#" + "&& reload_completed" + [(set (match_dup 4) (match_dup 5))] +{ + int endian = TARGET_LITTLE_ENDIAN ? 0 : 1; + rtx op1 = gen_rtx_REG (SFmode, true_regnum (operands[1]) + (1 ^ endian)); + rtx op2 = gen_rtx_REG (SFmode, true_regnum (operands[2]) + (1 ^ endian)); + + operands[4] = gen_rtx_REG (SFmode, true_regnum (operands[0]) + (1 ^ endian)); + operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SFmode, op1, op2); +} + [(set_attr "type" "fparith_media")]) + +(define_insn "addsf3_i" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0") + (match_operand:SF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH2E" + "fadd %2,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "single")]) + +(define_expand "subsf3" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "") + (minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "") + (match_operand:SF 2 "fp_arith_reg_operand" "")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH2E) + { + expand_sf_binop (&gen_subsf3_i, operands); + DONE; + } +}) + +(define_insn "*subsf3_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fsub.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + +(define_insn "subsf3_i" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "0") + (match_operand:SF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH2E" + "fsub %2,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "single")]) + +(define_expand "mulsf3" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "") + (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "") + (match_operand:SF 2 "fp_arith_reg_operand" "")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH2E) + { + emit_insn (gen_mulsf3_i (operands[0], operands[1], operands[2], + get_fpscr_rtx ())); + DONE; + } +}) + +(define_insn "*mulsf3_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fmul.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + +(define_insn "mulsf3_i" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0") + (match_operand:SF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH2E" + "fmul %2,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "single")]) + +;; FMA (fused multiply-add) patterns +(define_expand "fmasf4" + [(set (match_operand:SF 0 "fp_arith_reg_operand") + (fma:SF (match_operand:SF 1 "fp_arith_reg_operand") + (match_operand:SF 2 "fp_arith_reg_operand") + (match_operand:SF 3 "fp_arith_reg_operand")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH2E) + { + emit_sf_insn (gen_fmasf4_i (operands[0], operands[1], operands[2], + operands[3], get_fpscr_rtx ())); + DONE; + } +}) + +(define_insn "fmasf4_i" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "w") + (match_operand:SF 2 "fp_arith_reg_operand" "f") + (match_operand:SF 3 "fp_arith_reg_operand" "0"))) + (use (match_operand:PSI 4 "fpscr_operand" "c"))] + "TARGET_SH2E" + "fmac %1,%2,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "single")]) + +(define_insn "fmasf4_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f") + (match_operand:SF 3 "fp_arith_reg_operand" "0")))] + "TARGET_SHMEDIA_FPU" + "fmac.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + +;; For some cases such as 'a * b + a' the FMA pattern is not generated by +;; previous transformations. If FMA is generally allowed, let the combine +;; pass utilize it. +(define_insn_and_split "*fmasf4" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%w") + (match_operand:SF 2 "fp_arith_reg_operand" "f")) + (match_operand:SF 3 "arith_reg_operand" "0"))) + (use (match_operand:PSI 4 "fpscr_operand"))] + "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF" + "fmac %1,%2,%0" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (fma:SF (match_dup 1) (match_dup 2) (match_dup 3))) + (use (match_dup 4))])] +{ + /* Change 'b * a + a' into 'a * b + a'. + This is better for register allocation. */ + if (REGNO (operands[2]) == REGNO (operands[3])) + { + rtx tmp = operands[1]; + operands[1] = operands[2]; + operands[2] = tmp; + } +} + [(set_attr "type" "fp") + (set_attr "fp_mode" "single")]) + +(define_insn "*fmasf4_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")) + (match_operand:SF 3 "fp_arith_reg_operand" "0")))] + "TARGET_SHMEDIA_FPU && flag_fp_contract_mode != FP_CONTRACT_OFF" + "fmac.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + +(define_expand "divsf3" + [(set (match_operand:SF 0 "fp_arith_reg_operand") + (div:SF (match_operand:SF 1 "fp_arith_reg_operand") + (match_operand:SF 2 "fp_arith_reg_operand")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH2E) + { + expand_sf_binop (&gen_divsf3_i, operands); + DONE; + } +}) + +(define_insn "*divsf3_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (div:SF (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fdiv.s %1, %2, %0" + [(set_attr "type" "fdiv_media")]) + +(define_insn "divsf3_i" + [(set (match_operand:SF 0 "fp_arith_reg_dest" "=f") + (div:SF (match_operand:SF 1 "fp_arith_reg_operand" "0") + (match_operand:SF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH2E" + "fdiv %2,%0" + [(set_attr "type" "fdiv") + (set_attr "fp_mode" "single")]) + +(define_insn "floatdisf2" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (float:SF (match_operand:DI 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "float.qs %1, %0" + [(set_attr "type" "fpconv_media")]) + +(define_expand "floatsisf2" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "") + (float:SF (match_operand:SI 1 "fpul_operand" "")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH4 || TARGET_SH2A_SINGLE) + { + emit_sf_insn (gen_floatsisf2_i4 (operands[0], operands[1], + get_fpscr_rtx ())); + DONE; + } +}) + +(define_insn "*floatsisf2_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (float:SF (match_operand:SI 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "float.ls %1, %0" + [(set_attr "type" "fpconv_media")]) + +(define_insn "floatsisf2_i4" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (float:SF (match_operand:SI 1 "fpul_operand" "y"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_SINGLE)" + "float %1,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "single")]) + +(define_insn "*floatsisf2_ie" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (float:SF (match_operand:SI 1 "fpul_operand" "y")))] + "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)" + "float %1,%0" + [(set_attr "type" "fp")]) + +(define_insn "fix_truncsfdi2" + [(set (match_operand:DI 0 "fp_arith_reg_dest" "=f") + (fix:DI (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "ftrc.sq %1, %0" + [(set_attr "type" "fpconv_media")]) + +(define_expand "fix_truncsfsi2" + [(set (match_operand:SI 0 "fpul_operand" "=y") + (fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH4 || TARGET_SH2A_SINGLE) + { + emit_sf_insn (gen_fix_truncsfsi2_i4 (operands[0], operands[1], + get_fpscr_rtx ())); + DONE; + } +}) + +(define_insn "*fix_truncsfsi2_media" + [(set (match_operand:SI 0 "fp_arith_reg_operand" "=f") + (fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "ftrc.sl %1, %0" + [(set_attr "type" "fpconv_media")]) + +(define_insn "fix_truncsfsi2_i4" + [(set (match_operand:SI 0 "fpul_operand" "=y") + (fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_SINGLE)" + "ftrc %1,%0" + [(set_attr "type" "ftrc_s") + (set_attr "fp_mode" "single")]) + +;; ??? This pattern is used nowhere. fix_truncsfsi2 always expands to +;; fix_truncsfsi2_i4. +;; (define_insn "fix_truncsfsi2_i4_2" +;; [(set (match_operand:SI 0 "arith_reg_operand" "=r") +;; (fix:SI (match_operand:SF 1 "arith_reg_operand" "f"))) +;; (use (reg:PSI FPSCR_REG)) +;; (clobber (reg:SI FPUL_REG))] +;; "TARGET_SH4" +;; "#" +;; [(set_attr "length" "4") +;; (set_attr "fp_mode" "single")]) + +;;(define_split +;; [(set (match_operand:SI 0 "arith_reg_operand" "=r") +;; (fix:SI (match_operand:SF 1 "arith_reg_operand" "f"))) +;; (use (match_operand:PSI 2 "fpscr_operand" "c")) +;; (clobber (reg:SI FPUL_REG))] +;; "TARGET_SH4" +;; [(parallel [(set (reg:SI FPUL_REG) (fix:SI (match_dup 1))) +;; (use (match_dup 2))]) +;; (set (match_dup 0) (reg:SI FPUL_REG))]) + +(define_insn "*fixsfsi" + [(set (match_operand:SI 0 "fpul_operand" "=y") + (fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)" + "ftrc %1,%0" + [(set_attr "type" "fp")]) + +(define_insn "cmpgtsf_t" + [(set (reg:SI T_REG) + (gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f") + (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)" + "fcmp/gt %1,%0" + [(set_attr "type" "fp_cmp") + (set_attr "fp_mode" "single")]) + +(define_insn "cmpeqsf_t" + [(set (reg:SI T_REG) + (eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f") + (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)" + "fcmp/eq %1,%0" + [(set_attr "type" "fp_cmp") + (set_attr "fp_mode" "single")]) + +(define_insn "ieee_ccmpeqsf_t" + [(set (reg:SI T_REG) + (ior:SI (reg:SI T_REG) + (eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f") + (match_operand:SF 1 "fp_arith_reg_operand" "f"))))] + "TARGET_SH2E && TARGET_IEEE && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)" +{ + return output_ieee_ccmpeq (insn, operands); +} + [(set_attr "length" "4")]) + + +(define_insn "cmpgtsf_t_i4" + [(set (reg:SI T_REG) + (gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f") + (match_operand:SF 1 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_SINGLE)" + "fcmp/gt %1,%0" + [(set_attr "type" "fp_cmp") + (set_attr "fp_mode" "single")]) + +(define_insn "cmpeqsf_t_i4" + [(set (reg:SI T_REG) + (eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f") + (match_operand:SF 1 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_SINGLE)" + "fcmp/eq %1,%0" + [(set_attr "type" "fp_cmp") + (set_attr "fp_mode" "single")]) + +(define_insn "*ieee_ccmpeqsf_t_4" + [(set (reg:SI T_REG) + (ior:SI (reg:SI T_REG) + (eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f") + (match_operand:SF 1 "fp_arith_reg_operand" "f")))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_IEEE && (TARGET_SH4 || TARGET_SH2A_SINGLE)" +{ + return output_ieee_ccmpeq (insn, operands); +} + [(set_attr "length" "4") + (set_attr "fp_mode" "single")]) + +(define_insn "cmpeqsf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpeq.s %1, %2, %0" + [(set_attr "type" "fcmp_media")]) + +(define_insn "cmpgtsf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (gt:SI (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpgt.s %1, %2, %0" + [(set_attr "type" "fcmp_media")]) + +(define_insn "cmpgesf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (ge:SI (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpge.s %1, %2, %0" + [(set_attr "type" "fcmp_media")]) + +(define_insn "cmpunsf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (unordered:SI (match_operand:SF 1 "fp_arith_reg_operand" "f") + (match_operand:SF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpun.s %1, %2, %0" + [(set_attr "type" "fcmp_media")]) + +(define_expand "cbranchsf4" + [(set (pc) + (if_then_else (match_operator 0 "sh_float_comparison_operator" + [(match_operand:SF 1 "arith_operand" "") + (match_operand:SF 2 "arith_operand" "")]) + (match_operand 3 "" "") + (pc)))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SHMEDIA) + emit_jump_insn (gen_cbranchfp4_media (operands[0], operands[1], operands[2], + operands[3])); + else + sh_emit_compare_and_branch (operands, SFmode); + DONE; +}) + +(define_expand "negsf2" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "") + (neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH2E) + { + expand_sf_unop (&gen_negsf2_i, operands); + DONE; + } +}) + +(define_insn "*negsf2_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fneg.s %1, %0" + [(set_attr "type" "fmove_media")]) + +(define_insn "negsf2_i" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH2E" + "fneg %0" + [(set_attr "type" "fmove") + (set_attr "fp_mode" "single")]) + +(define_expand "sqrtsf2" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "") + (sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))] + "TARGET_SH3E || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH3E) + { + expand_sf_unop (&gen_sqrtsf2_i, operands); + DONE; + } +}) + +(define_insn "*sqrtsf2_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fsqrt.s %1, %0" + [(set_attr "type" "fdiv_media")]) + +(define_insn "sqrtsf2_i" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH3E" + "fsqrt %0" + [(set_attr "type" "fdiv") + (set_attr "fp_mode" "single")]) + +(define_insn "rsqrtsf2" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (div:SF (match_operand:SF 1 "immediate_operand" "i") + (sqrt:SF (match_operand:SF 2 "fp_arith_reg_operand" "0")))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_FPU_ANY && TARGET_FSRRA + && operands[1] == CONST1_RTX (SFmode)" + "fsrra %0" + [(set_attr "type" "fsrra") + (set_attr "fp_mode" "single")]) + +;; When the sincos pattern is defined, the builtin functions sin and cos +;; will be expanded to the sincos pattern and one of the output values will +;; remain unused. +(define_expand "sincossf3" + [(set (match_operand:SF 0 "nonimmediate_operand") + (unspec:SF [(match_operand:SF 2 "fp_arith_reg_operand")] UNSPEC_FCOSA)) + (set (match_operand:SF 1 "nonimmediate_operand") + (unspec:SF [(match_dup 2)] UNSPEC_FSINA))] + "TARGET_FPU_ANY && TARGET_FSCA" +{ + rtx scaled = gen_reg_rtx (SFmode); + rtx truncated = gen_reg_rtx (SImode); + rtx fsca = gen_reg_rtx (V2SFmode); + rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ()); + + emit_sf_insn (gen_mulsf3 (scaled, operands[2], scale_reg)); + emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled)); + emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (), + get_fpscr_rtx ())); + + emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 4)); + emit_move_insn (operands[1], gen_rtx_SUBREG (SFmode, fsca, 0)); + DONE; +}) + +(define_insn_and_split "fsca" + [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f") + (vec_concat:V2SF + (unspec:SF [(mult:SF + (float:SF (match_operand:SI 1 "fpul_fsca_operand" "y")) + (match_operand:SF 2 "fsca_scale_factor" "i")) + ] UNSPEC_FSINA) + (unspec:SF [(mult:SF (float:SF (match_dup 1)) (match_dup 2)) + ] UNSPEC_FCOSA))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_FPU_ANY && TARGET_FSCA" + "fsca fpul,%d0" + "&& !fpul_operand (operands[1], SImode)" + [(const_int 0)] +{ + /* If operands[1] is something like (fix:SF (float:SF (reg:SI))) reduce it + to a simple reg, otherwise reload will have trouble reloading the + pseudo into fpul. */ + rtx x = XEXP (operands[1], 0); + while (x != NULL_RTX && !fpul_operand (x, SImode)) + { + gcc_assert (GET_CODE (x) == FIX || GET_CODE (x) == FLOAT); + x = XEXP (x, 0); + } + + gcc_assert (x != NULL_RTX && fpul_operand (x, SImode)); + emit_insn (gen_fsca (operands[0], x, operands[2], operands[3])); + DONE; +} + [(set_attr "type" "fsca") + (set_attr "fp_mode" "single")]) + +(define_expand "abssf2" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "") + (abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))] + "TARGET_SH2E || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH2E) + { + expand_sf_unop (&gen_abssf2_i, operands); + DONE; + } +}) + +(define_insn "*abssf2_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fabs.s %1, %0" + [(set_attr "type" "fmove_media")]) + +(define_insn "abssf2_i" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH2E" + "fabs %0" + [(set_attr "type" "fmove") + (set_attr "fp_mode" "single")]) + +(define_expand "adddf3" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "") + (match_operand:DF 2 "fp_arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_binop (&gen_adddf3_i, operands); + DONE; + } +}) + +(define_insn "*adddf3_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "%f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fadd.d %1, %2, %0" + [(set_attr "type" "dfparith_media")]) + +(define_insn "adddf3_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "%0") + (match_operand:DF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fadd %2,%0" + [(set_attr "type" "dfp_arith") + (set_attr "fp_mode" "double")]) + +(define_expand "subdf3" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "") + (match_operand:DF 2 "fp_arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_binop (&gen_subdf3_i, operands); + DONE; + } +}) + +(define_insn "*subdf3_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fsub.d %1, %2, %0" + [(set_attr "type" "dfparith_media")]) + +(define_insn "subdf3_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "0") + (match_operand:DF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fsub %2,%0" + [(set_attr "type" "dfp_arith") + (set_attr "fp_mode" "double")]) + +(define_expand "muldf3" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "") + (match_operand:DF 2 "fp_arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_binop (&gen_muldf3_i, operands); + DONE; + } +}) + +(define_insn "*muldf3_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "%f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fmul.d %1, %2, %0" + [(set_attr "type" "dfmul_media")]) + +(define_insn "muldf3_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "%0") + (match_operand:DF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fmul %2,%0" + [(set_attr "type" "dfp_mul") + (set_attr "fp_mode" "double")]) + +(define_expand "divdf3" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (div:DF (match_operand:DF 1 "fp_arith_reg_operand" "") + (match_operand:DF 2 "fp_arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_binop (&gen_divdf3_i, operands); + DONE; + } +}) + +(define_insn "*divdf3_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (div:DF (match_operand:DF 1 "fp_arith_reg_operand" "f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fdiv.d %1, %2, %0" + [(set_attr "type" "dfdiv_media")]) + +(define_insn "divdf3_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (div:DF (match_operand:DF 1 "fp_arith_reg_operand" "0") + (match_operand:DF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fdiv %2,%0" + [(set_attr "type" "dfdiv") + (set_attr "fp_mode" "double")]) + +(define_insn "floatdidf2" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (float:DF (match_operand:DI 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "float.qd %1, %0" + [(set_attr "type" "dfpconv_media")]) + +(define_expand "floatsidf2" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (float:DF (match_operand:SI 1 "fpul_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + emit_df_insn (gen_floatsidf2_i (operands[0], operands[1], + get_fpscr_rtx ())); + DONE; + } +}) + +(define_insn "*floatsidf2_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (float:DF (match_operand:SI 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "float.ld %1, %0" + [(set_attr "type" "dfpconv_media")]) + +(define_insn "floatsidf2_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (float:DF (match_operand:SI 1 "fpul_operand" "y"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "float %1,%0" + [(set_attr "type" "dfp_conv") + (set_attr "fp_mode" "double")]) + +(define_insn "fix_truncdfdi2" + [(set (match_operand:DI 0 "fp_arith_reg_dest" "=f") + (fix:DI (match_operand:DF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "ftrc.dq %1, %0" + [(set_attr "type" "dfpconv_media")]) + +(define_expand "fix_truncdfsi2" + [(set (match_operand:SI 0 "fpul_operand" "") + (fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + emit_df_insn (gen_fix_truncdfsi2_i (operands[0], operands[1], + get_fpscr_rtx ())); + DONE; + } +}) + +(define_insn "*fix_truncdfsi2_media" + [(set (match_operand:SI 0 "fp_arith_reg_operand" "=f") + (fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "ftrc.dl %1, %0" + [(set_attr "type" "dfpconv_media")]) + +(define_insn "fix_truncdfsi2_i" + [(set (match_operand:SI 0 "fpul_operand" "=y") + (fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "ftrc %1,%0" + [(set_attr "type" "dfp_conv") + (set_attr "dfp_comp" "no") + (set_attr "fp_mode" "double")]) + +;; ??? This pattern is used nowhere. fix_truncdfsi2 always expands to +;; fix_truncdfsi2_i. +;; (define_insn "fix_truncdfsi2_i4" +;; [(set (match_operand:SI 0 "arith_reg_operand" "=r") +;; (fix:SI (match_operand:DF 1 "arith_reg_operand" "f"))) +;; (use (match_operand:PSI 2 "fpscr_operand" "c")) +;; (clobber (reg:SI FPUL_REG))] +;; "TARGET_SH4" +;; "#" +;; [(set_attr "length" "4") +;; (set_attr "fp_mode" "double")]) +;; +;; (define_split +;; [(set (match_operand:SI 0 "arith_reg_operand" "=r") +;; (fix:SI (match_operand:DF 1 "arith_reg_operand" "f"))) +;; (use (match_operand:PSI 2 "fpscr_operand" "c")) +;; (clobber (reg:SI FPUL_REG))] +;; "TARGET_SH4" +;; [(parallel [(set (reg:SI FPUL_REG) (fix:SI (match_dup 1))) +;; (use (match_dup 2))]) +;; (set (match_dup 0) (reg:SI FPUL_REG))]) + +(define_insn "cmpgtdf_t" + [(set (reg:SI T_REG) + (gt:SI (match_operand:DF 0 "fp_arith_reg_operand" "f") + (match_operand:DF 1 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fcmp/gt %1,%0" + [(set_attr "type" "dfp_cmp") + (set_attr "fp_mode" "double")]) + +(define_insn "cmpeqdf_t" + [(set (reg:SI T_REG) + (eq:SI (match_operand:DF 0 "fp_arith_reg_operand" "f") + (match_operand:DF 1 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fcmp/eq %1,%0" + [(set_attr "type" "dfp_cmp") + (set_attr "fp_mode" "double")]) + +(define_insn "*ieee_ccmpeqdf_t" + [(set (reg:SI T_REG) + (ior:SI (reg:SI T_REG) + (eq:SI (match_operand:DF 0 "fp_arith_reg_operand" "f") + (match_operand:DF 1 "fp_arith_reg_operand" "f")))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_IEEE && (TARGET_SH4 || TARGET_SH2A_DOUBLE)" +{ + return output_ieee_ccmpeq (insn, operands); +} + [(set_attr "length" "4") + (set_attr "fp_mode" "double")]) + +(define_insn "cmpeqdf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (match_operand:DF 1 "fp_arith_reg_operand" "f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpeq.d %1,%2,%0" + [(set_attr "type" "fcmp_media")]) + +(define_insn "cmpgtdf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (gt:SI (match_operand:DF 1 "fp_arith_reg_operand" "f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpgt.d %1,%2,%0" + [(set_attr "type" "fcmp_media")]) + +(define_insn "cmpgedf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (ge:SI (match_operand:DF 1 "fp_arith_reg_operand" "f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpge.d %1,%2,%0" + [(set_attr "type" "fcmp_media")]) + +(define_insn "cmpundf_media" + [(set (match_operand:SI 0 "register_operand" "=r") + (unordered:SI (match_operand:DF 1 "fp_arith_reg_operand" "f") + (match_operand:DF 2 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcmpun.d %1,%2,%0" + [(set_attr "type" "fcmp_media")]) + +(define_expand "cbranchdf4" + [(set (pc) + (if_then_else (match_operator 0 "sh_float_comparison_operator" + [(match_operand:DF 1 "arith_operand" "") + (match_operand:DF 2 "arith_operand" "")]) + (match_operand 3 "" "") + (pc)))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SHMEDIA) + emit_jump_insn (gen_cbranchfp4_media (operands[0], operands[1], operands[2], + operands[3])); + else + sh_emit_compare_and_branch (operands, DFmode); + DONE; +}) + +(define_expand "negdf2" + [(set (match_operand:DF 0 "fp_arith_reg_operand") + (neg:DF (match_operand:DF 1 "fp_arith_reg_operand")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_unop (&gen_negdf2_i, operands); + DONE; + } +}) + +(define_insn "*negdf2_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (neg:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fneg.d %1, %0" + [(set_attr "type" "fmove_media")]) + +(define_insn "negdf2_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (neg:DF (match_operand:DF 1 "fp_arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fneg %0" + [(set_attr "type" "fmove") + (set_attr "fp_mode" "double")]) + +(define_expand "sqrtdf2" + [(set (match_operand:DF 0 "fp_arith_reg_operand") + (sqrt:DF (match_operand:DF 1 "fp_arith_reg_operand")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_unop (&gen_sqrtdf2_i, operands); + DONE; + } +}) + +(define_insn "*sqrtdf2_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (sqrt:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fsqrt.d %1, %0" + [(set_attr "type" "dfdiv_media")]) + +(define_insn "sqrtdf2_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (sqrt:DF (match_operand:DF 1 "fp_arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fsqrt %0" + [(set_attr "type" "dfdiv") + (set_attr "fp_mode" "double")]) + +(define_expand "absdf2" + [(set (match_operand:DF 0 "fp_arith_reg_operand") + (abs:DF (match_operand:DF 1 "fp_arith_reg_operand")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + expand_df_unop (&gen_absdf2_i, operands); + DONE; + } +}) + +(define_insn "*absdf2_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (abs:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fabs.d %1, %0" + [(set_attr "type" "fmove_media")]) + +(define_insn "absdf2_i" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (abs:DF (match_operand:DF 1 "fp_arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fabs %0" + [(set_attr "type" "fmove") + (set_attr "fp_mode" "double")]) + +(define_expand "extendsfdf2" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "") + (float_extend:DF (match_operand:SF 1 "fpul_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + emit_df_insn (gen_extendsfdf2_i4 (operands[0], operands[1], + get_fpscr_rtx ())); + DONE; + } +}) + +(define_insn "*extendsfdf2_media" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (float_extend:DF (match_operand:SF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcnv.sd %1, %0" + [(set_attr "type" "dfpconv_media")]) + +(define_insn "extendsfdf2_i4" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f") + (float_extend:DF (match_operand:SF 1 "fpul_operand" "y"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fcnvsd %1,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "double")]) + +(define_expand "truncdfsf2" + [(set (match_operand:SF 0 "fpul_operand" "") + (float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "")))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU" +{ + if (TARGET_SH4 || TARGET_SH2A_DOUBLE) + { + emit_df_insn (gen_truncdfsf2_i4 (operands[0], operands[1], + get_fpscr_rtx ())); + DONE; + } +}) + +(define_insn "*truncdfsf2_media" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "f")))] + "TARGET_SHMEDIA_FPU" + "fcnv.ds %1, %0" + [(set_attr "type" "dfpconv_media")]) + +(define_insn "truncdfsf2_i4" + [(set (match_operand:SF 0 "fpul_operand" "=y") + (float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "(TARGET_SH4 || TARGET_SH2A_DOUBLE)" + "fcnvds %1,%0" + [(set_attr "type" "fp") + (set_attr "fp_mode" "double")]) + +;; ------------------------------------------------------------------------- +;; Bit field extract patterns. +;; ------------------------------------------------------------------------- + +;; These give better code for packed bitfields, because they allow +;; auto-increment addresses to be generated. + +(define_expand "insv" + [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "") + (match_operand:SI 1 "immediate_operand" "") + (match_operand:SI 2 "immediate_operand" "")) + (match_operand:SI 3 "general_operand" ""))] + "TARGET_SH1 && TARGET_BIG_ENDIAN" +{ + rtx addr_target, orig_address, shift_reg, qi_val; + HOST_WIDE_INT bitsize, size, v = 0; + rtx x = operands[3]; + + if (TARGET_SH2A && TARGET_BITOPS + && (satisfies_constraint_Sbw (operands[0]) + || satisfies_constraint_Sbv (operands[0])) + && satisfies_constraint_M (operands[1]) + && satisfies_constraint_K03 (operands[2])) + { + if (satisfies_constraint_N (operands[3])) + { + emit_insn (gen_bclr_m2a (operands[0], operands[2])); + DONE; + } + else if (satisfies_constraint_M (operands[3])) + { + emit_insn (gen_bset_m2a (operands[0], operands[2])); + DONE; + } + else if ((REG_P (operands[3]) && REGNO (operands[3]) == T_REG) + && satisfies_constraint_M (operands[1])) + { + emit_insn (gen_bst_m2a (operands[0], operands[2])); + DONE; + } + else if (REG_P (operands[3]) + && satisfies_constraint_M (operands[1])) + { + emit_insn (gen_bld_reg (operands[3], const0_rtx)); + emit_insn (gen_bst_m2a (operands[0], operands[2])); + DONE; + } + } + /* ??? expmed doesn't care for non-register predicates. */ + if (! memory_operand (operands[0], VOIDmode) + || ! immediate_operand (operands[1], VOIDmode) + || ! immediate_operand (operands[2], VOIDmode) + || ! general_operand (x, VOIDmode)) + FAIL; + /* If this isn't a 16 / 24 / 32 bit field, or if + it doesn't start on a byte boundary, then fail. */ + bitsize = INTVAL (operands[1]); + if (bitsize < 16 || bitsize > 32 || bitsize % 8 != 0 + || (INTVAL (operands[2]) % 8) != 0) + FAIL; + + size = bitsize / 8; + orig_address = XEXP (operands[0], 0); + shift_reg = gen_reg_rtx (SImode); + if (CONST_INT_P (x)) + { + v = INTVAL (x); + qi_val = force_reg (QImode, GEN_INT (trunc_int_for_mode (v, QImode))); + } + else + { + emit_insn (gen_movsi (shift_reg, operands[3])); + qi_val = gen_rtx_SUBREG (QImode, shift_reg, 3); + } + addr_target = copy_addr_to_reg (plus_constant (Pmode, + orig_address, size - 1)); + + operands[0] = replace_equiv_address (operands[0], addr_target); + emit_insn (gen_movqi (operands[0], qi_val)); + + while (size -= 1) + { + if (CONST_INT_P (x)) + qi_val + = force_reg (QImode, GEN_INT (trunc_int_for_mode (v >>= 8, QImode))); + else + { + emit_insn (gen_lshrsi3_k (shift_reg, shift_reg, GEN_INT (8))); + qi_val = gen_rtx_SUBREG (QImode, shift_reg, 3); + } + emit_insn (gen_addsi3 (addr_target, addr_target, constm1_rtx)); + emit_insn (gen_movqi (operands[0], qi_val)); + } + + DONE; +}) + +(define_insn "movua" + [(set (match_operand:SI 0 "register_operand" "=z") + (unspec:SI [(match_operand:BLK 1 "unaligned_load_operand" "Sua>")] + UNSPEC_MOVUA))] + "TARGET_SH4A_ARCH" + "movua.l %1,%0" + [(set_attr "type" "movua")]) + +;; We shouldn't need this, but cse replaces increments with references +;; to other regs before flow has a chance to create post_inc +;; addressing modes, and only postreload's cse_move2add brings the +;; increments back to a usable form. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (mem:SI (match_operand:SI 1 "register_operand" "")) + (const_int 32) (const_int 0))) + (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))] + "TARGET_SH4A_ARCH && REGNO (operands[0]) != REGNO (operands[1])" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (mem:SI (post_inc:SI + (match_operand:SI 1 "register_operand" ""))) + (const_int 32) (const_int 0)))] + "") + +(define_expand "extv" + [(set (match_operand:SI 0 "register_operand" "") + (sign_extract:SI (match_operand:QI 1 "unaligned_load_operand" "") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "TARGET_SH4A_ARCH || TARGET_SH2A" +{ + if (TARGET_SH2A && TARGET_BITOPS + && (satisfies_constraint_Sbw (operands[1]) + || satisfies_constraint_Sbv (operands[1])) + && satisfies_constraint_M (operands[2]) + && satisfies_constraint_K03 (operands[3])) + { + emit_insn (gen_bldsign_m2a (operands[1], operands[3])); + if (REGNO (operands[0]) != T_REG) + emit_insn (gen_movsi (operands[0], gen_rtx_REG (SImode, T_REG))); + DONE; + } + if (TARGET_SH4A_ARCH + && INTVAL (operands[2]) == 32 + && INTVAL (operands[3]) == 0 + && MEM_P (operands[1]) && MEM_ALIGN (operands[1]) < 32) + { + rtx src = adjust_address (operands[1], BLKmode, 0); + set_mem_size (src, 4); + emit_insn (gen_movua (operands[0], src)); + DONE; + } + + FAIL; +}) + +(define_expand "extzv" + [(set (match_operand:SI 0 "register_operand" "") + (zero_extract:SI (match_operand:QI 1 "unaligned_load_operand" "") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "TARGET_SH4A_ARCH || TARGET_SH2A" +{ + if (TARGET_SH2A && TARGET_BITOPS + && (satisfies_constraint_Sbw (operands[1]) + || satisfies_constraint_Sbv (operands[1])) + && satisfies_constraint_M (operands[2]) + && satisfies_constraint_K03 (operands[3])) + { + emit_insn (gen_bld_m2a (operands[1], operands[3])); + if (REGNO (operands[0]) != T_REG) + emit_insn (gen_movsi (operands[0], gen_rtx_REG (SImode, T_REG))); + DONE; + } + if (TARGET_SH4A_ARCH + && INTVAL (operands[2]) == 32 + && INTVAL (operands[3]) == 0 + && MEM_P (operands[1]) && MEM_ALIGN (operands[1]) < 32) + { + rtx src = adjust_address (operands[1], BLKmode, 0); + set_mem_size (src, 4); + emit_insn (gen_movua (operands[0], src)); + DONE; + } + + FAIL; +}) + +;; SH2A instructions for bitwise operations. +;; FIXME: Convert multiple instruction insns to insn_and_split. +;; FIXME: Use iterators to fold at least and,xor,or insn variations. + +;; Clear a bit in a memory location. +(define_insn "bclr_m2a" + [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv") + (and:QI + (not:QI (ashift:QI (const_int 1) + (match_operand:QI 1 "const_int_operand" "K03,K03"))) + (match_dup 0)))] + "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])" + "@ + bclr.b %1,%0 + bclr.b %1,@(0,%t0)" +[(set_attr "length" "4,4")]) + +(define_insn "bclrmem_m2a" + [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv") + (and:QI (match_dup 0) + (match_operand:QI 1 "const_int_operand" "Psz,Psz")))] + "TARGET_SH2A && satisfies_constraint_Psz (operands[1]) && TARGET_BITOPS" + "@ + bclr.b %W1,%0 + bclr.b %W1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +;; Set a bit in a memory location. +(define_insn "bset_m2a" + [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv") + (ior:QI + (ashift:QI (const_int 1) + (match_operand:QI 1 "const_int_operand" "K03,K03")) + (match_dup 0)))] + "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])" + "@ + bset.b %1,%0 + bset.b %1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +(define_insn "bsetmem_m2a" + [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv") + (ior:QI (match_dup 0) + (match_operand:QI 1 "const_int_operand" "Pso,Pso")))] + "TARGET_SH2A && satisfies_constraint_Pso (operands[1]) && TARGET_BITOPS" + "@ + bset.b %V1,%0 + bset.b %V1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +;;; Transfer the contents of the T bit to a specified bit of memory. +(define_insn "bst_m2a" + [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,m") + (if_then_else (eq (reg:SI T_REG) (const_int 0)) + (and:QI + (not:QI (ashift:QI (const_int 1) + (match_operand:QI 1 "const_int_operand" "K03,K03"))) + (match_dup 0)) + (ior:QI + (ashift:QI (const_int 1) (match_dup 1)) + (match_dup 0))))] + "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])" + "@ + bst.b %1,%0 + bst.b %1,@(0,%t0)" + [(set_attr "length" "4")]) + +;; Store a specified bit of memory in the T bit. +(define_insn "bld_m2a" + [(set (reg:SI T_REG) + (zero_extract:SI + (match_operand:QI 0 "bitwise_memory_operand" "Sbw,Sbv") + (const_int 1) + (match_operand 1 "const_int_operand" "K03,K03")))] + "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])" + "@ + bld.b %1,%0 + bld.b %1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +;; Store a specified bit of memory in the T bit. +(define_insn "bldsign_m2a" + [(set (reg:SI T_REG) + (sign_extract:SI + (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m") + (const_int 1) + (match_operand 1 "const_int_operand" "K03,K03")))] + "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])" + "@ + bld.b %1,%0 + bld.b %1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +;; Store a specified bit of the LSB 8 bits of a register in the T bit. +(define_insn "bld_reg" + [(set (reg:SI T_REG) + (zero_extract:SI (match_operand:SI 0 "arith_reg_operand" "r") + (const_int 1) + (match_operand 1 "const_int_operand" "K03")))] + "TARGET_SH2A && satisfies_constraint_K03 (operands[1])" + "bld %1,%0") + +(define_insn "*bld_regqi" + [(set (reg:SI T_REG) + (zero_extract:SI (match_operand:QI 0 "arith_reg_operand" "r") + (const_int 1) + (match_operand 1 "const_int_operand" "K03")))] + "TARGET_SH2A && satisfies_constraint_K03 (operands[1])" + "bld %1,%0") + +;; Take logical and of a specified bit of memory with the T bit and +;; store its result in the T bit. +(define_insn "band_m2a" + [(set (reg:SI T_REG) + (and:SI (reg:SI T_REG) + (zero_extract:SI + (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m") + (const_int 1) + (match_operand 1 "const_int_operand" "K03,K03"))))] + "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])" + "@ + band.b %1,%0 + band.b %1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +(define_insn "bandreg_m2a" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (and:SI (zero_extract:SI + (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv") + (const_int 1) + (match_operand 2 "const_int_operand" "K03,K03")) + (match_operand:SI 3 "register_operand" "r,r")))] + "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[2])" +{ + static const char* alt[] = + { + "band.b %2,%1" "\n" + " movt %0", + + "band.b %2,@(0,%t1)" "\n" + " movt %0" + }; + return alt[which_alternative]; +} + [(set_attr "length" "6,6")]) + +;; Take logical or of a specified bit of memory with the T bit and +;; store its result in the T bit. +(define_insn "bor_m2a" + [(set (reg:SI T_REG) + (ior:SI (reg:SI T_REG) + (zero_extract:SI + (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m") + (const_int 1) + (match_operand 1 "const_int_operand" "K03,K03"))))] + "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])" + "@ + bor.b %1,%0 + bor.b %1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +(define_insn "borreg_m2a" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (ior:SI (zero_extract:SI + (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv") + (const_int 1) + (match_operand 2 "const_int_operand" "K03,K03")) + (match_operand:SI 3 "register_operand" "=r,r")))] + "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[2])" +{ + static const char* alt[] = + { + "bor.b %2,%1" "\n" + " movt %0", + + "bor.b %2,@(0,%t1)" "\n" + " movt %0" + }; + return alt[which_alternative]; +} + [(set_attr "length" "6,6")]) + +;; Take exclusive or of a specified bit of memory with the T bit and +;; store its result in the T bit. +(define_insn "bxor_m2a" + [(set (reg:SI T_REG) + (xor:SI (reg:SI T_REG) + (zero_extract:SI + (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m") + (const_int 1) + (match_operand 1 "const_int_operand" "K03,K03"))))] + "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])" + "@ + bxor.b %1,%0 + bxor.b %1,@(0,%t0)" + [(set_attr "length" "4,4")]) + +(define_insn "bxorreg_m2a" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (xor:SI (zero_extract:SI + (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv") + (const_int 1) + (match_operand 2 "const_int_operand" "K03,K03")) + (match_operand:SI 3 "register_operand" "=r,r")))] + "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[2])" +{ + static const char* alt[] = + { + "bxor.b %2,%1" "\n" + " movt %0", + + "bxor.b %2,@(0,%t1)" "\n" + " movt %0" + }; + return alt[which_alternative]; +} + [(set_attr "length" "6,6")]) + +;; ------------------------------------------------------------------------- +;; Peepholes +;; ------------------------------------------------------------------------- +;; This matches cases where the bit in a memory location is set. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand") + (sign_extend:SI (match_operand:QI 1 "bitwise_memory_operand"))) + (set (match_dup 0) + (ior:SI (match_dup 0) + (match_operand:SI 2 "const_int_operand"))) + (set (match_dup 1) + (match_operand 3 "arith_reg_operand"))] + "TARGET_SH2A && TARGET_BITOPS + && satisfies_constraint_Pso (operands[2]) + && REGNO (operands[0]) == REGNO (operands[3])" + [(set (match_dup 1) + (ior:QI (match_dup 1) (match_dup 2)))] + "") + +;; This matches cases where the bit in a memory location is cleared. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand") + (sign_extend:SI (match_operand:QI 1 "bitwise_memory_operand"))) + (set (match_dup 0) + (and:SI (match_dup 0) + (match_operand:SI 2 "const_int_operand"))) + (set (match_dup 1) + (match_operand 3 "arith_reg_operand"))] + "TARGET_SH2A && TARGET_BITOPS + && satisfies_constraint_Psz (operands[2]) + && REGNO (operands[0]) == REGNO (operands[3])" + [(set (match_dup 1) + (and:QI (match_dup 1) (match_dup 2)))] + "") + +;; This matches cases where a stack pointer increment at the start of the +;; epilogue combines with a stack slot read loading the return value. +(define_peephole + [(set (match_operand:SI 0 "arith_reg_operand" "") + (mem:SI (match_operand:SI 1 "arith_reg_operand" ""))) + (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))] + "TARGET_SH1 && REGNO (operands[1]) != REGNO (operands[0])" + "mov.l @%1+,%0") + +;; See the comment on the dt combiner pattern above. +(define_peephole + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (plus:SI (match_dup 0) + (const_int -1))) + (set (reg:SI T_REG) + (eq:SI (match_dup 0) (const_int 0)))] + "TARGET_SH2" + "dt %0") + +;; The following peepholes fold load sequences for which reload was not +;; able to generate a displacement addressing move insn. +;; This can happen when reload has to transform a move insn +;; without displacement into one with displacement. Or when reload can't +;; fit a displacement into the insn's constraints. In the latter case, the +;; load destination reg remains at r0, which reload compensates by inserting +;; another mov insn. + +;; Fold sequence: +;; mov #54,r0 +;; mov.{b,w} @(r0,r15),r0 +;; mov r0,r3 +;; into: +;; mov.{b,w} @(54,r15),r3 +;; +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_dest" "") + (match_operand:SI 1 "const_int_operand" "")) + (set (match_operand:SI 2 "arith_reg_dest" "") + (sign_extend:SI + (mem:QI (plus:SI (match_dup 0) + (match_operand:SI 3 "arith_reg_operand" ""))))) + (set (match_operand:QI 4 "arith_reg_dest" "") + (match_operand:QI 5 "arith_reg_operand" ""))] + "TARGET_SH2A + && sh_legitimate_index_p (QImode, operands[1], true, true) + && REGNO (operands[2]) == REGNO (operands[5]) + && peep2_reg_dead_p (3, operands[5])" + [(set (match_dup 4) (mem:QI (plus:SI (match_dup 3) (match_dup 1))))] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_dest" "") + (match_operand:SI 1 "const_int_operand" "")) + (set (match_operand:SI 2 "arith_reg_dest" "") + (sign_extend:SI + (mem:HI (plus:SI (match_dup 0) + (match_operand:SI 3 "arith_reg_operand" ""))))) + (set (match_operand:HI 4 "arith_reg_dest" "") + (match_operand:HI 5 "arith_reg_operand" ""))] + "TARGET_SH2A + && sh_legitimate_index_p (HImode, operands[1], true, true) + && REGNO (operands[2]) == REGNO (operands[5]) + && peep2_reg_dead_p (3, operands[5])" + [(set (match_dup 4) (mem:HI (plus:SI (match_dup 3) (match_dup 1))))] + "") + +;; Fold sequence: +;; mov #54,r0 +;; mov.{b,w} @(r0,r15),r1 +;; into: +;; mov.{b,w} @(54,r15),r1 +;; +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_dest" "") + (match_operand:SI 1 "const_int_operand" "")) + (set (match_operand:SI 2 "arith_reg_dest" "") + (sign_extend:SI + (mem:QI (plus:SI (match_dup 0) + (match_operand:SI 3 "arith_reg_operand" "")))))] + "TARGET_SH2A + && sh_legitimate_index_p (QImode, operands[1], true, true) + && (peep2_reg_dead_p (2, operands[0]) + || REGNO (operands[0]) == REGNO (operands[2]))" + [(set (match_dup 2) + (sign_extend:SI (mem:QI (plus:SI (match_dup 3) (match_dup 1)))))] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_dest" "") + (match_operand:SI 1 "const_int_operand" "")) + (set (match_operand:SI 2 "arith_reg_dest" "") + (sign_extend:SI + (mem:HI (plus:SI (match_dup 0) + (match_operand:SI 3 "arith_reg_operand" "")))))] + "TARGET_SH2A + && sh_legitimate_index_p (HImode, operands[1], true, true) + && (peep2_reg_dead_p (2, operands[0]) + || REGNO (operands[0]) == REGNO (operands[2]))" + [(set (match_dup 2) + (sign_extend:SI (mem:HI (plus:SI (match_dup 3) (match_dup 1)))))] + "") + +;; Fold sequence: +;; mov.{b,w} @(r0,r15),r0 +;; mov r0,r3 +;; into: +;; mov.{b,w} @(r0,r15),r3 +;; +;; This can happen when initially a displacement address is picked, where +;; the destination reg is fixed to r0, and then the address is transformed +;; into 'r0 + reg'. +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_dest" "") + (sign_extend:SI + (mem:QI (plus:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" ""))))) + (set (match_operand:QI 3 "arith_reg_dest" "") + (match_operand:QI 4 "arith_reg_operand" ""))] + "TARGET_SH1 + && REGNO (operands[0]) == REGNO (operands[4]) + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 3) + (mem:QI (plus:SI (match_dup 1) (match_dup 2))))] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "arith_reg_dest" "") + (sign_extend:SI + (mem:HI (plus:SI (match_operand:SI 1 "arith_reg_operand" "") + (match_operand:SI 2 "arith_reg_operand" ""))))) + (set (match_operand:HI 3 "arith_reg_dest" "") + (match_operand:HI 4 "arith_reg_operand" ""))] + "TARGET_SH1 + && REGNO (operands[0]) == REGNO (operands[4]) + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 3) + (mem:HI (plus:SI (match_dup 1) (match_dup 2))))] + "") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 2 "general_movsrc_operand" ""))] + "TARGET_SH1 && REGNO (operands[0]) == 0 + && ((REG_P (operands[2]) && REGNO (operands[2]) < 16) + || (GET_CODE (operands[2]) == SUBREG + && REGNO (SUBREG_REG (operands[2])) < 16)) + && reg_unused_after (operands[0], insn)" + "mov.l %2,@(%0,%1)") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SF 2 "general_movdst_operand" "") + + (mem:SF (match_dup 0)))] + "TARGET_SH1 && REGNO (operands[0]) == 0 + && ((REG_P (operands[2]) && REGNO (operands[2]) < 16) + || (GET_CODE (operands[2]) == SUBREG + && REGNO (SUBREG_REG (operands[2])) < 16)) + && reg_unused_after (operands[0], insn)" + "mov.l @(%0,%1),%2") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 2 "general_movsrc_operand" ""))] + "TARGET_SH2E && REGNO (operands[0]) == 0 + && ((REG_P (operands[2]) + && FP_OR_XD_REGISTER_P (REGNO (operands[2]))) + || (GET_CODE (operands[2]) == SUBREG + && FP_OR_XD_REGISTER_P (REGNO (SUBREG_REG (operands[2]))))) + && reg_unused_after (operands[0], insn)" + "fmov{.s|} %2,@(%0,%1)") + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r"))) + (set (match_operand:SF 2 "general_movdst_operand" "") + + (mem:SF (match_dup 0)))] + "TARGET_SH2E && REGNO (operands[0]) == 0 + && ((REG_P (operands[2]) + && FP_OR_XD_REGISTER_P (REGNO (operands[2]))) + || (GET_CODE (operands[2]) == SUBREG + && FP_OR_XD_REGISTER_P (REGNO (SUBREG_REG (operands[2]))))) + && reg_unused_after (operands[0], insn)" + "fmov{.s|} @(%0,%1),%2") + +;; Switch to a new stack with its address in sp_switch (a SYMBOL_REF). +(define_insn "sp_switch_1" + [(set (reg:SI SP_REG) (unspec_volatile [(match_operand:SI 0 "" "")] + UNSPECV_SP_SWITCH_B))] + "TARGET_SH1" +{ + return "mov.l r0,@-r15" "\n" + " mov.l %0,r0" "\n" + " mov.l @r0,r0" "\n" + " mov.l r15,@-r0" "\n" + " mov r0,r15"; +} + [(set_attr "length" "10")]) + +;; Switch back to the original stack for interrupt functions with the +;; sp_switch attribute. +(define_insn "sp_switch_2" + [(unspec_volatile [(const_int 0)] + UNSPECV_SP_SWITCH_E)] + "TARGET_SH1" +{ + return "mov.l @r15,r15" "\n" + " mov.l @r15+,r0"; +} + [(set_attr "length" "4")]) + +;; ------------------------------------------------------------------------- +;; Integer vector moves +;; ------------------------------------------------------------------------- + +(define_expand "movv8qi" + [(set (match_operand:V8QI 0 "general_movdst_operand" "") + (match_operand:V8QI 1 "general_movsrc_operand" ""))] + "TARGET_SHMEDIA" +{ + prepare_move_operands (operands, V8QImode); +}) + +(define_insn "movv8qi_i" + [(set (match_operand:V8QI 0 "general_movdst_operand" "=r,r,r,rl,m") + (match_operand:V8QI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], V8QImode) + || sh_register_operand (operands[1], V8QImode))" + "@ + add %1, r63, %0 + movi %1, %0 + # + ld%M1.q %m1, %0 + st%M0.q %m0, %N1" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media") + (set_attr "length" "4,4,16,4,4")]) + +(define_split + [(set (match_operand:V8QI 0 "arith_reg_dest" "") + (subreg:V8QI (const_int 0) 0))] + "TARGET_SHMEDIA" + [(set (match_dup 0) + (const_vector:V8QI [(const_int 0) (const_int 0) (const_int 0) + (const_int 0) (const_int 0) (const_int 0) + (const_int 0) (const_int 0)]))]) + +(define_split + [(set (match_operand 0 "arith_reg_dest" "") + (match_operand 1 "sh_rep_vec" ""))] + "TARGET_SHMEDIA && reload_completed + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && sh_vector_mode_supported_p (GET_MODE (operands[0])) + && GET_MODE_SIZE (GET_MODE (operands[0])) == 8 + && (XVECEXP (operands[1], 0, 0) != const0_rtx + || XVECEXP (operands[1], 0, 1) != const0_rtx) + && (XVECEXP (operands[1], 0, 0) != constm1_rtx + || XVECEXP (operands[1], 0, 1) != constm1_rtx)" + [(set (match_dup 0) (match_dup 1)) + (match_dup 2)] +{ + int unit_size = GET_MODE_UNIT_SIZE (GET_MODE (operands[1])); + rtx elt1 = XVECEXP (operands[1], 0, 1); + + if (unit_size > 2) + operands[2] = gen_mshflo_l (operands[0], operands[0], operands[0]); + else + { + if (unit_size < 2) + operands[0] = gen_rtx_REG (V4HImode, true_regnum (operands[0])); + operands[2] = gen_mperm_w0 (operands[0], operands[0]); + } + operands[0] = gen_rtx_REG (DImode, true_regnum (operands[0])); + operands[1] = XVECEXP (operands[1], 0, 0); + if (unit_size < 2) + { + if (CONST_INT_P (operands[1]) && CONST_INT_P (elt1)) + operands[1] + = GEN_INT (TARGET_LITTLE_ENDIAN + ? (INTVAL (operands[1]) & 0xff) + (INTVAL (elt1) << 8) + : (INTVAL (operands[1]) << 8) + (INTVAL (elt1) & 0xff)); + else + { + operands[0] = gen_rtx_REG (V2QImode, true_regnum (operands[0])); + operands[1] + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, operands[1], elt1)); + } + } +}) + +(define_split + [(set (match_operand 0 "arith_reg_dest" "") + (match_operand 1 "sh_const_vec" ""))] + "TARGET_SHMEDIA && reload_completed + && GET_MODE (operands[0]) == GET_MODE (operands[1]) + && sh_vector_mode_supported_p (GET_MODE (operands[0]))" + [(set (match_dup 0) (match_dup 1))] +{ + rtx v = operands[1]; + enum machine_mode new_mode + = mode_for_size (GET_MODE_BITSIZE (GET_MODE (v)), MODE_INT, 0); + + operands[0] = gen_rtx_REG (new_mode, true_regnum (operands[0])); + operands[1] + = simplify_subreg (new_mode, operands[1], GET_MODE (operands[1]), 0); +}) + +(define_expand "movv2hi" + [(set (match_operand:V2HI 0 "general_movdst_operand" "") + (match_operand:V2HI 1 "general_movsrc_operand" ""))] + "TARGET_SHMEDIA" +{ + prepare_move_operands (operands, V2HImode); +}) + +(define_insn "movv2hi_i" + [(set (match_operand:V2HI 0 "general_movdst_operand" "=r,r,r,rl,m") + (match_operand:V2HI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], V2HImode) + || sh_register_operand (operands[1], V2HImode))" + "@ + add.l %1, r63, %0 + movi %1, %0 + # + ld%M1.l %m1, %0 + st%M0.l %m0, %N1" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media") + (set_attr "length" "4,4,16,4,4") + (set (attr "highpart") + (cond [(match_test "sh_contains_memref_p (insn)") + (const_string "user")] + (const_string "ignore")))]) + +(define_expand "movv4hi" + [(set (match_operand:V4HI 0 "general_movdst_operand" "") + (match_operand:V4HI 1 "general_movsrc_operand" ""))] + "TARGET_SHMEDIA" +{ + prepare_move_operands (operands, V4HImode); +}) + +(define_insn "movv4hi_i" + [(set (match_operand:V4HI 0 "general_movdst_operand" "=r,r,r,rl,m") + (match_operand:V4HI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], V4HImode) + || sh_register_operand (operands[1], V4HImode))" + "@ + add %1, r63, %0 + movi %1, %0 + # + ld%M1.q %m1, %0 + st%M0.q %m0, %N1" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media") + (set_attr "length" "4,4,16,4,4") + (set_attr "highpart" "depend")]) + +(define_expand "movv2si" + [(set (match_operand:V2SI 0 "general_movdst_operand" "") + (match_operand:V2SI 1 "general_movsrc_operand" ""))] + "TARGET_SHMEDIA" +{ + prepare_move_operands (operands, V2SImode); +}) + +(define_insn "movv2si_i" + [(set (match_operand:V2SI 0 "general_movdst_operand" "=r,r,r,rl,m") + (match_operand:V2SI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))] + "TARGET_SHMEDIA + && (register_operand (operands[0], V2SImode) + || sh_register_operand (operands[1], V2SImode))" + "@ + add %1, r63, %0 + # + # + ld%M1.q %m1, %0 + st%M0.q %m0, %N1" + [(set_attr "type" "arith_media,arith_media,*,load_media,store_media") + (set_attr "length" "4,4,16,4,4") + (set_attr "highpart" "depend")]) + +;; ------------------------------------------------------------------------- +;; Multimedia Intrinsics +;; ------------------------------------------------------------------------- + +(define_insn "absv2si2" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (abs:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mabs.l %1, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "absv4hi2" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (abs:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mabs.w %1, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "addv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (plus:V2SI (match_operand:V2SI 1 "arith_reg_operand" "%r") + (match_operand:V2SI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "madd.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "addv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (plus:V4HI (match_operand:V4HI 1 "arith_reg_operand" "%r") + (match_operand:V4HI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "madd.w %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn_and_split "addv2hi3" + [(set (match_operand:V2HI 0 "arith_reg_dest" "=r") + (plus:V2HI (match_operand:V2HI 1 "extend_reg_operand" "%r") + (match_operand:V2HI 2 "extend_reg_operand" "r")))] + "TARGET_SHMEDIA" + "#" + "TARGET_SHMEDIA" + [(const_int 0)] +{ + rtx src0 = simplify_gen_subreg (V4HImode, operands[1], V2HImode, 0); + rtx src1 = simplify_gen_subreg (V4HImode, operands[2], V2HImode, 0); + rtx v4hi_dst = simplify_gen_subreg (V4HImode, operands[0], V2HImode, 0); + rtx di_dst = simplify_gen_subreg (DImode, operands[0], V2HImode, 0); + rtx si_dst = simplify_gen_subreg (SImode, operands[0], V2HImode, 0); + + emit_insn (gen_addv4hi3 (v4hi_dst, src0, src1)); + emit_insn (gen_truncdisi2 (si_dst, di_dst)); + DONE; +} + [(set_attr "highpart" "must_split")]) + +(define_insn "ssaddv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ss_plus:V2SI (match_operand:V2SI 1 "arith_reg_operand" "%r") + (match_operand:V2SI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "madds.l %1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "usaddv8qi3" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (us_plus:V8QI (match_operand:V8QI 1 "arith_reg_operand" "%r") + (match_operand:V8QI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "madds.ub %1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "ssaddv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ss_plus:V4HI (match_operand:V4HI 1 "arith_reg_operand" "%r") + (match_operand:V4HI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "madds.w %1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "negcmpeqv8qi" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (neg:V8QI (eq:V8QI + (match_operand:V8QI 1 "arith_reg_or_0_operand" "%rZ") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcmpeq.b %N1, %N2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "negcmpeqv2si" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (neg:V2SI (eq:V2SI + (match_operand:V2SI 1 "arith_reg_or_0_operand" "%rZ") + (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcmpeq.l %N1, %N2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "negcmpeqv4hi" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (neg:V4HI (eq:V4HI + (match_operand:V4HI 1 "arith_reg_or_0_operand" "%rZ") + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcmpeq.w %N1, %N2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "negcmpgtuv8qi" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (neg:V8QI (gtu:V8QI + (match_operand:V8QI 1 "arith_reg_or_0_operand" "%rZ") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcmpgt.ub %N1, %N2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "negcmpgtv2si" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (neg:V2SI (gt:V2SI + (match_operand:V2SI 1 "arith_reg_or_0_operand" "%rZ") + (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcmpgt.l %N1, %N2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "negcmpgtv4hi" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (neg:V4HI (gt:V4HI + (match_operand:V4HI 1 "arith_reg_or_0_operand" "%rZ") + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcmpgt.w %N1, %N2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "mcmv" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_operand" "r")) + (and:DI (match_operand:DI 3 "arith_reg_operand" "0") + (not:DI (match_dup 2)))))] + "TARGET_SHMEDIA" + "mcmv %N1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "mcnvs_lw" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_concat:V4HI + (ss_truncate:V2HI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")) + (ss_truncate:V2HI + (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcnvs.lw %N1, %N2, %0" + [(set_attr "type" "mcmp_media")]) + +(define_insn "mcnvs_wb" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (vec_concat:V8QI + (ss_truncate:V4QI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")) + (ss_truncate:V4QI + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcnvs.wb %N1, %N2, %0" + [(set_attr "type" "mcmp_media")]) + +(define_insn "mcnvs_wub" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (vec_concat:V8QI + (us_truncate:V4QI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")) + (us_truncate:V4QI + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mcnvs.wub %N1, %N2, %0" + [(set_attr "type" "mcmp_media")]) + +(define_insn "mextr_rl" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (lshiftrt:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:HI 3 "mextr_bit_offset" "i")) + (ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ") + (match_operand:HI 4 "mextr_bit_offset" "i"))))] + "TARGET_SHMEDIA && INTVAL (operands[3]) + INTVAL (operands[4]) == 64" +{ + static char templ[21]; + sprintf (templ, "mextr%d %%N1, %%N2, %%0", + (int) INTVAL (operands[3]) >> 3); + return templ; +} + [(set_attr "type" "arith_media")]) + +(define_insn "*mextr_lr" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:HI 3 "mextr_bit_offset" "i")) + (lshiftrt:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ") + (match_operand:HI 4 "mextr_bit_offset" "i"))))] + "TARGET_SHMEDIA && INTVAL (operands[3]) + INTVAL (operands[4]) == 64" +{ + static char templ[21]; + sprintf (templ, "mextr%d %%N2, %%N1, %%0", + (int) INTVAL (operands[4]) >> 3); + return templ; +} + [(set_attr "type" "arith_media")]) + +; mextrN can be modelled with vec_select / vec_concat, but the selection +; vector then varies depending on endianness. +(define_expand "mextr1" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (1 * 8), GEN_INT (7 * 8))); + DONE; +}) + +(define_expand "mextr2" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (2 * 8), GEN_INT (6 * 8))); + DONE; +}) + +(define_expand "mextr3" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (3 * 8), GEN_INT (5 * 8))); + DONE; +}) + +(define_expand "mextr4" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (4 * 8), GEN_INT (4 * 8))); + DONE; +}) + +(define_expand "mextr5" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (5 * 8), GEN_INT (3 * 8))); + DONE; +}) + +(define_expand "mextr6" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (6 * 8), GEN_INT (2 * 8))); + DONE; +}) + +(define_expand "mextr7" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2], + GEN_INT (7 * 8), GEN_INT (1 * 8))); + DONE; +}) + +(define_expand "mmacfx_wl" + [(match_operand:V2SI 0 "arith_reg_dest" "") + (match_operand:V2HI 1 "extend_reg_operand" "") + (match_operand:V2HI 2 "extend_reg_operand" "") + (match_operand:V2SI 3 "arith_reg_operand" "")] + "TARGET_SHMEDIA" +{ + emit_insn (gen_mmacfx_wl_i (operands[0], operands[3], + operands[1], operands[2])); + DONE; +}) + +;; This could be highpart ignore if it only had inputs 2 or 3, but input 1 +;; is depend +(define_insn "mmacfx_wl_i" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ss_plus:V2SI + (match_operand:V2SI 1 "arith_reg_operand" "0") + (ss_truncate:V2SI + (ashift:V2DI + (sign_extend:V2DI + (mult:V2SI + (sign_extend:V2SI (match_operand:V2HI 2 "extend_reg_operand" "r")) + (sign_extend:V2SI (match_operand:V2HI 3 "extend_reg_operand" "r")))) + (const_int 1)))))] + "TARGET_SHMEDIA" + "mmacfx.wl %2, %3, %0" + [(set_attr "type" "mac_media") + (set_attr "highpart" "depend")]) + +(define_expand "mmacnfx_wl" + [(match_operand:V2SI 0 "arith_reg_dest" "") + (match_operand:V2HI 1 "extend_reg_operand" "") + (match_operand:V2HI 2 "extend_reg_operand" "") + (match_operand:V2SI 3 "arith_reg_operand" "")] + "TARGET_SHMEDIA" +{ + emit_insn (gen_mmacnfx_wl_i (operands[0], operands[3], + operands[1], operands[2])); + DONE; +}) + +(define_insn "mmacnfx_wl_i" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ss_minus:V2SI + (match_operand:V2SI 1 "arith_reg_operand" "0") + (ss_truncate:V2SI + (ashift:V2DI + (sign_extend:V2DI + (mult:V2SI + (sign_extend:V2SI (match_operand:V2HI 2 "extend_reg_operand" "r")) + (sign_extend:V2SI (match_operand:V2HI 3 "extend_reg_operand" "r")))) + (const_int 1)))))] + "TARGET_SHMEDIA" + "mmacnfx.wl %2, %3, %0" + [(set_attr "type" "mac_media") + (set_attr "highpart" "depend")]) + +(define_insn "mulv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (mult:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r") + (match_operand:V2SI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mmul.l %1, %2, %0" + [(set_attr "type" "d2mpy_media") + (set_attr "highpart" "depend")]) + +(define_insn "mulv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (mult:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r") + (match_operand:V4HI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mmul.w %1, %2, %0" + [(set_attr "type" "dmpy_media") + (set_attr "highpart" "depend")]) + +(define_insn "mmulfx_l" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ss_truncate:V2SI + (ashiftrt:V2DI + (mult:V2DI + (sign_extend:V2DI (match_operand:V2SI 1 "arith_reg_operand" "r")) + (sign_extend:V2DI (match_operand:V2SI 2 "arith_reg_operand" "r"))) + (const_int 31))))] + "TARGET_SHMEDIA" + "mmulfx.l %1, %2, %0" + [(set_attr "type" "d2mpy_media") + (set_attr "highpart" "depend")]) + +(define_insn "mmulfx_w" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ss_truncate:V4HI + (ashiftrt:V4SI + (mult:V4SI + (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r")) + (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r"))) + (const_int 15))))] + "TARGET_SHMEDIA" + "mmulfx.w %1, %2, %0" + [(set_attr "type" "dmpy_media") + (set_attr "highpart" "depend")]) + +(define_insn "mmulfxrp_w" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ss_truncate:V4HI + (ashiftrt:V4SI + (plus:V4SI + (mult:V4SI + (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r")) + (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r"))) + (const_int 16384)) + (const_int 15))))] + "TARGET_SHMEDIA" + "mmulfxrp.w %1, %2, %0" + [(set_attr "type" "dmpy_media") + (set_attr "highpart" "depend")]) + + +(define_expand "mmulhi_wl" + [(match_operand:V2SI 0 "arith_reg_dest" "") + (match_operand:V4HI 1 "arith_reg_operand" "") + (match_operand:V4HI 2 "arith_reg_operand" "")] + "TARGET_SHMEDIA" +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mmul23_wl : gen_mmul01_wl) + (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "mmullo_wl" + [(match_operand:V2SI 0 "arith_reg_dest" "") + (match_operand:V4HI 1 "arith_reg_operand" "") + (match_operand:V4HI 2 "arith_reg_operand" "")] + "TARGET_SHMEDIA" +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mmul01_wl : gen_mmul23_wl) + (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "mmul23_wl" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (vec_select:V2SI + (mult:V4SI + (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r")) + (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r"))) + (parallel [(const_int 2) (const_int 3)])))] + "TARGET_SHMEDIA" +{ + return (TARGET_LITTLE_ENDIAN + ? "mmulhi.wl %1, %2, %0" + : "mmullo.wl %1, %2, %0"); +} + [(set_attr "type" "dmpy_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "big") (const_string "ignore")] + (const_string "user")))]) + +(define_insn "mmul01_wl" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (vec_select:V2SI + (mult:V4SI + (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r")) + (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r"))) + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_SHMEDIA" +{ + return (TARGET_LITTLE_ENDIAN + ? "mmullo.wl %1, %2, %0" + : "mmulhi.wl %1, %2, %0"); +} + [(set_attr "type" "dmpy_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "little") (const_string "ignore")] + (const_string "user")))]) + + +(define_expand "mmulsum_wq" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:V4HI 1 "arith_reg_operand" "") + (match_operand:V4HI 2 "arith_reg_operand" "") + (match_operand:DI 3 "arith_reg_operand" "")] + "TARGET_SHMEDIA" +{ + emit_insn (gen_mmulsum_wq_i (operands[0], operands[3], + operands[1], operands[2])); + DONE; +}) + +(define_insn "mmulsum_wq_i" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (plus:DI (match_operand:DI 1 "arith_reg_operand" "0") + (plus:DI + (plus:DI + (vec_select:DI + (mult:V4DI + (sign_extend:V4DI (match_operand:V4HI 2 "arith_reg_operand" "r")) + (sign_extend:V4DI (match_operand:V4HI 3 "arith_reg_operand" "r"))) + (parallel [(const_int 0)])) + (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2)) + (sign_extend:V4DI (match_dup 3))) + (parallel [(const_int 1)]))) + (plus:DI + (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2)) + (sign_extend:V4DI (match_dup 3))) + (parallel [(const_int 2)])) + (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2)) + (sign_extend:V4DI (match_dup 3))) + (parallel [(const_int 3)]))))))] + "TARGET_SHMEDIA" + "mmulsum.wq %2, %3, %0" + [(set_attr "type" "mac_media")]) + +(define_expand "mperm_w" + [(match_operand:V4HI 0 "arith_reg_dest" "=r") + (match_operand:V4HI 1 "arith_reg_operand" "r") + (match_operand:QI 2 "extend_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mperm_w_little : gen_mperm_w_big) + (operands[0], operands[1], operands[2])); + DONE; +}) + +; This use of vec_select isn't exactly correct according to rtl.texi +; (because not constant), but it seems a straightforward extension. +(define_insn "mperm_w_little" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_select:V4HI + (match_operand:V4HI 1 "arith_reg_operand" "r") + (parallel + [(zero_extract:QI (match_operand:QI 2 "extend_reg_or_0_operand" "rZ") + (const_int 2) (const_int 0)) + (zero_extract:QI (match_dup 2) (const_int 2) (const_int 2)) + (zero_extract:QI (match_dup 2) (const_int 2) (const_int 4)) + (zero_extract:QI (match_dup 2) (const_int 2) (const_int 6))])))] + "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN" + "mperm.w %1, %N2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "mperm_w_big" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_select:V4HI + (match_operand:V4HI 1 "arith_reg_operand" "r") + (parallel + [(zero_extract:QI (not:QI (match_operand:QI 2 + "extend_reg_or_0_operand" "rZ")) + (const_int 2) (const_int 0)) + (zero_extract:QI (not:QI (match_dup 2)) (const_int 2) (const_int 2)) + (zero_extract:QI (not:QI (match_dup 2)) (const_int 2) (const_int 4)) + (zero_extract:QI (not:QI (match_dup 2)) + (const_int 2) (const_int 6))])))] + "TARGET_SHMEDIA && TARGET_BIG_ENDIAN" + "mperm.w %1, %N2, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "mperm_w0" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_duplicate:V4HI (truncate:HI (match_operand 1 + "trunc_hi_operand" "r"))))] + "TARGET_SHMEDIA" + "mperm.w %1, r63, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_expand "msad_ubq" + [(match_operand:DI 0 "arith_reg_dest" "") + (match_operand:V8QI 1 "arith_reg_or_0_operand" "") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "") + (match_operand:DI 3 "arith_reg_operand" "")] + "TARGET_SHMEDIA" +{ + emit_insn (gen_msad_ubq_i (operands[0], operands[3], + operands[1], operands[2])); + DONE; +}) + +(define_insn "msad_ubq_i" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (plus:DI + (plus:DI + (plus:DI + (plus:DI + (match_operand:DI 1 "arith_reg_operand" "0") + (abs:DI (vec_select:DI + (minus:V8DI + (zero_extend:V8DI + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")) + (zero_extend:V8DI + (match_operand:V8QI 3 "arith_reg_or_0_operand" "rZ"))) + (parallel [(const_int 0)])))) + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 1)])))) + (plus:DI + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 2)]))) + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 3)]))))) + (plus:DI + (plus:DI + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 4)]))) + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 5)])))) + (plus:DI + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 6)]))) + (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2)) + (zero_extend:V8DI (match_dup 3))) + (parallel [(const_int 7)])))))))] + "TARGET_SHMEDIA" + "msad.ubq %N2, %N3, %0" + [(set_attr "type" "mac_media")]) + +(define_insn "mshalds_l" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ss_truncate:V2SI + (ashift:V2DI + (sign_extend:V2DI (match_operand:V2SI 1 "arith_reg_operand" "r")) + (and:DI (match_operand:DI 2 "arith_reg_operand" "r") + (const_int 31)))))] + "TARGET_SHMEDIA" + "mshalds.l %1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "mshalds_w" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ss_truncate:V4HI + (ashift:V4SI + (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r")) + (and:DI (match_operand:DI 2 "arith_reg_operand" "r") + (const_int 15)))))] + "TARGET_SHMEDIA" + "mshalds.w %1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "ashrv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ashiftrt:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mshard.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "ashrv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ashiftrt:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mshard.w %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "mshards_q" + [(set (match_operand:HI 0 "arith_reg_dest" "=r") + (ss_truncate:HI + (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "arith_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mshards.q %1, %N2, %0" + [(set_attr "type" "mcmp_media")]) + +(define_expand "mshfhi_b" + [(match_operand:V8QI 0 "arith_reg_dest" "") + (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_b : gen_mshf0_b) + (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "mshflo_b" + [(match_operand:V8QI 0 "arith_reg_dest" "") + (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_b : gen_mshf4_b) + (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "mshf4_b" + [(set + (match_operand:V8QI 0 "arith_reg_dest" "=r") + (vec_select:V8QI + (vec_concat:V16QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")) + (parallel [(const_int 4) (const_int 12) (const_int 5) (const_int 13) + (const_int 6) (const_int 14) (const_int 7) (const_int 15)])))] + "TARGET_SHMEDIA" +{ + return (TARGET_LITTLE_ENDIAN + ? "mshfhi.b %N1, %N2, %0" + : "mshflo.b %N1, %N2, %0"); +} + [(set_attr "type" "arith_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "big") (const_string "ignore")] + (const_string "user")))]) + +(define_insn "mshf0_b" + [(set + (match_operand:V8QI 0 "arith_reg_dest" "=r") + (vec_select:V8QI + (vec_concat:V16QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")) + (parallel [(const_int 0) (const_int 8) (const_int 1) (const_int 9) + (const_int 2) (const_int 10) (const_int 3) (const_int 11)])))] + "TARGET_SHMEDIA" +{ + return (TARGET_LITTLE_ENDIAN + ? "mshflo.b %N1, %N2, %0" + : "mshfhi.b %N1, %N2, %0"); +} + [(set_attr "type" "arith_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "little") (const_string "ignore")] + (const_string "user")))]) + +(define_expand "mshfhi_l" + [(match_operand:V2SI 0 "arith_reg_dest" "") + (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_l : gen_mshf0_l) + (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "mshflo_l" + [(match_operand:V2SI 0 "arith_reg_dest" "") + (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_l : gen_mshf4_l) + (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "mshf4_l" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (vec_select:V2SI + (vec_concat:V4SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")) + (parallel [(const_int 1) (const_int 3)])))] + "TARGET_SHMEDIA" +{ + return (TARGET_LITTLE_ENDIAN + ? "mshfhi.l %N1, %N2, %0" + : "mshflo.l %N1, %N2, %0"); +} + [(set_attr "type" "arith_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "big") (const_string "ignore")] + (const_string "user")))]) + +(define_insn "mshf0_l" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (vec_select:V2SI + (vec_concat:V4SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")) + (parallel [(const_int 0) (const_int 2)])))] + "TARGET_SHMEDIA" +{ + return (TARGET_LITTLE_ENDIAN + ? "mshflo.l %N1, %N2, %0" + : "mshfhi.l %N1, %N2, %0"); +} + [(set_attr "type" "arith_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "little") (const_string "ignore")] + (const_string "user")))]) + +(define_expand "mshfhi_w" + [(match_operand:V4HI 0 "arith_reg_dest" "") + (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_w : gen_mshf0_w) + (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "mshflo_w" + [(match_operand:V4HI 0 "arith_reg_dest" "") + (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")] + "TARGET_SHMEDIA" +{ + emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_w : gen_mshf4_w) + (operands[0], operands[1], operands[2])); + DONE; +}) + +(define_insn "mshf4_w" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_select:V4HI + (vec_concat:V8HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")) + (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))] + "TARGET_SHMEDIA" +{ + return (TARGET_LITTLE_ENDIAN + ? "mshfhi.w %N1, %N2, %0" + : "mshflo.w %N1, %N2, %0"); +} + [(set_attr "type" "arith_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "big") (const_string "ignore")] + (const_string "user")))]) + +(define_insn "mshf0_w" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_select:V4HI + (vec_concat:V8HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")) + (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] + "TARGET_SHMEDIA" +{ + return (TARGET_LITTLE_ENDIAN + ? "mshflo.w %N1, %N2, %0" + : "mshfhi.w %N1, %N2, %0"); +} + [(set_attr "type" "arith_media") + (set (attr "highpart") + (cond [(eq_attr "endian" "little") (const_string "ignore")] + (const_string "user")))]) + +(define_insn "mshflo_w_x" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (vec_select:V4HI + (vec_concat:V4HI (match_operand:V2HI 1 "extend_reg_or_0_operand" "rZ") + (match_operand:V2HI 2 "extend_reg_or_0_operand" "rZ")) + (parallel [(const_int 2) (const_int 0) (const_int 3) (const_int 1)])))] + "TARGET_SHMEDIA" + "mshflo.w %N1, %N2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +;; These are useful to expand ANDs and as combiner patterns. +(define_insn_and_split "mshfhi_l_di" + [(set (match_operand:DI 0 "arith_reg_dest" "=r,f") + (ior:DI (lshiftrt:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ,f") + (const_int 32)) + (and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ,?f") + (const_int -4294967296))))] + "TARGET_SHMEDIA" + "@ + mshfhi.l %N1, %N2, %0 + #" + "TARGET_SHMEDIA && reload_completed + && ! GENERAL_REGISTER_P (true_regnum (operands[0]))" + [(set (match_dup 3) (match_dup 4)) + (set (match_dup 5) (match_dup 6))] +{ + operands[3] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[0]); + operands[6] = gen_highpart (SImode, operands[2]); +} + [(set_attr "type" "arith_media")]) + +(define_insn "*mshfhi_l_di_rev" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (const_int -4294967296)) + (lshiftrt:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ") + (const_int 32))))] + "TARGET_SHMEDIA" + "mshfhi.l %N2, %N1, %0" + [(set_attr "type" "arith_media")]) + +(define_split + [(set (match_operand:DI 0 "arith_reg_dest" "") + (ior:DI (zero_extend:DI (match_operand:SI 1 + "extend_reg_or_0_operand" "")) + (and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "") + (const_int -4294967296)))) + (clobber (match_operand:DI 3 "arith_reg_dest" ""))] + "TARGET_SHMEDIA" + [(const_int 0)] +{ + emit_insn (gen_ashldi3_media (operands[3], + simplify_gen_subreg (DImode, operands[1], + SImode, 0), + GEN_INT (32))); + emit_insn (gen_mshfhi_l_di (operands[0], operands[3], operands[2])); + DONE; +}) + +(define_insn "mshflo_l_di" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (const_int 4294967295)) + (ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ") + (const_int 32))))] + + "TARGET_SHMEDIA" + "mshflo.l %N1, %N2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "*mshflo_l_di_rev" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (const_int 32)) + (and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ") + (const_int 4294967295))))] + + "TARGET_SHMEDIA" + "mshflo.l %N2, %N1, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +;; Combiner pattern for trampoline initialization. +(define_insn_and_split "*double_shori" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0") + (const_int 32)) + (match_operand:DI 2 "const_int_operand" "n")))] + "TARGET_SHMEDIA + && ! (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0xffffffffUL)" + "#" + "rtx_equal_p (operands[0], operands[1])" + [(const_int 0)] +{ + HOST_WIDE_INT v = INTVAL (operands[2]); + + emit_insn (gen_shori_media (operands[0], operands[0], GEN_INT (v >> 16))); + emit_insn (gen_shori_media (operands[0], operands[0], GEN_INT (v & 65535))); + DONE; +} + [(set_attr "highpart" "ignore")]) + +(define_insn "*mshflo_l_di_x" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_or_0_operand" + "rZ")) + (ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ") + (const_int 32))))] + "TARGET_SHMEDIA" + "mshflo.l %N1, %N2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn_and_split "concat_v2sf" + [(set (match_operand:V2SF 0 "register_operand" "=r,f,f?") +;; (vec_concat:V2SF (match_operand:SF 1 "register_operand" "rZ,0,f") + (vec_concat:V2SF (match_operand:SF 1 "register_operand" "rZ,f,f") + (match_operand:SF 2 "register_operand" "rZ,f,f")))] + "TARGET_SHMEDIA" + "@ + mshflo.l %N1, %N2, %0 + # + #" + "TARGET_SHMEDIA && reload_completed + && ! GENERAL_REGISTER_P (true_regnum (operands[0]))" + [(set (match_dup 3) (match_dup 1)) + (set (match_dup 4) (match_dup 2))] +{ + operands[3] = simplify_gen_subreg (SFmode, operands[0], V2SFmode, 0); + operands[4] = simplify_gen_subreg (SFmode, operands[0], V2SFmode, 4); +} + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "*mshflo_l_di_x_rev" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ") + (const_int 32)) + (zero_extend:DI + (match_operand:SI 2 "extend_reg_or_0_operand" "rZ"))))] + "TARGET_SHMEDIA" + "mshflo.l %N2, %N1, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "ignore")]) + +(define_insn "ashlv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ashift:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "shift_count_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mshlld.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_split + [(set (match_operand 0 "any_register_operand" "") + (match_operator 3 "shift_operator" + [(match_operand 1 "any_register_operand" "") + (match_operand 2 "shift_count_reg_operand" "")]))] + "TARGET_SHMEDIA && ! register_operand (operands[2], VOIDmode)" + [(set (match_dup 0) (match_dup 3))] +{ + rtx count = operands[2]; + enum machine_mode outer_mode = GET_MODE (operands[2]), inner_mode; + + while (GET_CODE (count) == ZERO_EXTEND || GET_CODE (count) == SIGN_EXTEND + || (GET_CODE (count) == SUBREG && SUBREG_BYTE (count) == 0) + || GET_CODE (count) == TRUNCATE) + count = XEXP (count, 0); + inner_mode = GET_MODE (count); + count = simplify_gen_subreg (outer_mode, count, inner_mode, + subreg_lowpart_offset (outer_mode, inner_mode)); + operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]), + operands[1], count); +}) + +(define_insn "ashlv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ashift:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "shift_count_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mshlld.w %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "lshrv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (lshiftrt:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "shift_count_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mshlrd.l %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "lshrv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (lshiftrt:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r") + (match_operand:DI 2 "shift_count_reg_operand" "r")))] + "TARGET_SHMEDIA" + "mshlrd.w %1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "subv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (minus:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2SI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "msub.l %N1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn "subv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (minus:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V4HI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "msub.w %N1, %2, %0" + [(set_attr "type" "arith_media") + (set_attr "highpart" "depend")]) + +(define_insn_and_split "subv2hi3" + [(set (match_operand:V2HI 0 "arith_reg_dest" "=r") + (minus:V2HI (match_operand:V2HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2HI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "#" + "TARGET_SHMEDIA" + [(const_int 0)] +{ + rtx src0 = simplify_gen_subreg (V4HImode, operands[1], V2HImode, 0); + rtx src1 = simplify_gen_subreg (V4HImode, operands[2], V2HImode, 0); + rtx v4hi_dst = simplify_gen_subreg (V4HImode, operands[0], V2HImode, 0); + rtx di_dst = simplify_gen_subreg (DImode, operands[0], V2HImode, 0); + rtx si_dst = simplify_gen_subreg (SImode, operands[0], V2HImode, 0); + + emit_insn (gen_subv4hi3 (v4hi_dst, src0, src1)); + emit_insn (gen_truncdisi2 (si_dst, di_dst)); + DONE; +} + [(set_attr "highpart" "must_split")]) + +(define_insn "sssubv2si3" + [(set (match_operand:V2SI 0 "arith_reg_dest" "=r") + (ss_minus:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V2SI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "msubs.l %N1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "ussubv8qi3" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (us_minus:V8QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V8QI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "msubs.ub %N1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +(define_insn "sssubv4hi3" + [(set (match_operand:V4HI 0 "arith_reg_dest" "=r") + (ss_minus:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ") + (match_operand:V4HI 2 "arith_reg_operand" "r")))] + "TARGET_SHMEDIA" + "msubs.w %N1, %2, %0" + [(set_attr "type" "mcmp_media") + (set_attr "highpart" "depend")]) + +;; ------------------------------------------------------------------------- +;; Floating Point Intrinsics +;; ------------------------------------------------------------------------- + +(define_insn "fcosa_s" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (unspec:SF [(match_operand:SI 1 "fp_arith_reg_operand" "f")] + UNSPEC_FCOSA))] + "TARGET_SHMEDIA" + "fcosa.s %1, %0" + [(set_attr "type" "atrans_media")]) + +(define_insn "fsina_s" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (unspec:SF [(match_operand:SI 1 "fp_arith_reg_operand" "f")] + UNSPEC_FSINA))] + "TARGET_SHMEDIA" + "fsina.s %1, %0" + [(set_attr "type" "atrans_media")]) + +(define_insn "fipr" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (plus:SF (plus:SF (vec_select:SF (mult:V4SF (match_operand:V4SF 1 + "fp_arith_reg_operand" "f") + (match_operand:V4SF 2 + "fp_arith_reg_operand" "f")) + (parallel [(const_int 0)])) + (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2)) + (parallel [(const_int 1)]))) + (plus:SF (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2)) + (parallel [(const_int 2)])) + (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2)) + (parallel [(const_int 3)])))))] + "TARGET_SHMEDIA" + "fipr.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + +(define_insn "fsrra_s" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "f")] + UNSPEC_FSRRA))] + "TARGET_SHMEDIA" + "fsrra.s %1, %0" + [(set_attr "type" "atrans_media")]) + +(define_insn "ftrv" + [(set (match_operand:V4SF 0 "fp_arith_reg_operand" "=f") + (plus:V4SF + (plus:V4SF + (mult:V4SF + (vec_select:V4SF (match_operand:V16SF 1 "fp_arith_reg_operand" "f") + (parallel [(const_int 0) (const_int 5) + (const_int 10) (const_int 15)])) + (match_operand:V4SF 2 "fp_arith_reg_operand" "f")) + (mult:V4SF + (vec_select:V4SF (match_dup 1) + (parallel [(const_int 4) (const_int 9) + (const_int 14) (const_int 3)])) + (vec_select:V4SF (match_dup 2) + (parallel [(const_int 1) (const_int 2) + (const_int 3) (const_int 0)])))) + (plus:V4SF + (mult:V4SF + (vec_select:V4SF (match_dup 1) + (parallel [(const_int 8) (const_int 13) + (const_int 2) (const_int 7)])) + (vec_select:V4SF (match_dup 2) + (parallel [(const_int 2) (const_int 3) + (const_int 0) (const_int 1)]))) + (mult:V4SF + (vec_select:V4SF (match_dup 1) + (parallel [(const_int 12) (const_int 1) + (const_int 6) (const_int 11)])) + (vec_select:V4SF (match_dup 2) + (parallel [(const_int 3) (const_int 0) + (const_int 1) (const_int 2)]))))))] + "TARGET_SHMEDIA" + "ftrv.s %1, %2, %0" + [(set_attr "type" "fparith_media")]) + +(define_insn "ldhi_l" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (zero_extract:SI + (mem:SI (plus:SI (ior:SI (match_operand:QI 1 "ua_address_operand" "p") + (const_int 3)) + (const_int -3))) + (plus:SI (and:SI (match_dup 1) (const_int 3)) (const_int 1)) + (const_int 0)))] + "TARGET_SHMEDIA32" + "ldhi.l %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn "ldhi_q" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (plus:SI (ior:SI (match_operand:QI 1 "ua_address_operand" "p") + (const_int 7)) + (const_int -7))) + (plus:SI (and:SI (match_dup 1) (const_int 7)) (const_int 1)) + (const_int 0)))] + "TARGET_SHMEDIA32" + "ldhi.q %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn_and_split "*ldhi_q_comb0" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (plus:SI (ior:SI (plus:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "ua_offset" "I06")) + (const_int 7)) + (const_int -7))) + (plus:SI (and:SI (match_dup 1) (const_int 7)) + (const_int 1)) + (const_int 0)))] + "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & 7) == 0" + "#" + "" + [(pc)] +{ + emit_insn (gen_ldhi_q (operands[0], + gen_rtx_PLUS (SImode, operands[1], operands[2]))); + DONE; +}) + +(define_insn_and_split "*ldhi_q_comb1" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (plus:SI (ior:SI (plus:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "ua_offset" "I06")) + (const_int 7)) + (const_int -7))) + (plus:SI (and:SI (plus:SI (match_dup 1) + (match_operand:SI 3 "ua_offset" "I06")) + (const_int 7)) + (const_int 1)) + (const_int 0)))] + "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & -8) + && (INTVAL (operands[2]) & 7) == INTVAL (operands[3])" + "#" + "" + [(pc)] +{ + emit_insn (gen_ldhi_q (operands[0], + gen_rtx_PLUS (SImode, operands[1], operands[2]))); + DONE; +}) + +(define_insn "ldlo_l" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (zero_extract:SI + (mem:SI (and:SI (match_operand:QI 1 "ua_address_operand" "p") + (const_int -4))) + (minus:SI (const_int 4) (and:SI (match_dup 1) (const_int 3))) + (and:SI (match_dup 1) (const_int 3))))] + "TARGET_SHMEDIA32" + "ldlo.l %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn "ldlo_q" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (and:SI (match_operand:QI 1 "ua_address_operand" "p") + (const_int -8))) + (minus:SI (const_int 8) (and:SI (match_dup 1) (const_int 7))) + (and:SI (match_dup 1) (const_int 7))))] + "TARGET_SHMEDIA32" + "ldlo.q %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn_and_split "*ldlo_q_comb0" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "ua_offset" "I06")) + (const_int -8))) + (minus:SI (const_int 8) (and:SI (match_dup 1) (const_int 7))) + (and:SI (match_dup 1) (const_int 7))))] + "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & 7) == 0" + "#" + "" + [(pc)] +{ + emit_insn (gen_ldlo_q (operands[0], + gen_rtx_PLUS (SImode, operands[1], operands[2]))); + DONE; +}) + +(define_insn_and_split "*ldlo_q_comb1" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r") + (match_operand:SI 2 "ua_offset" "I06")) + (const_int -8))) + (minus:SI (const_int 8) + (and:SI (plus:SI (match_dup 1) + (match_operand:SI 3 "ua_offset" "I06")) + (const_int 7))) + (and:SI (plus:SI (match_dup 1) (match_dup 3)) (const_int 7))))] + "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & -8) + && (INTVAL (operands[2]) & 7) == INTVAL (operands[3])" + "#" + "" + [(pc)] +{ + emit_insn (gen_ldlo_q (operands[0], + gen_rtx_PLUS (SImode, operands[1], operands[2]))); + DONE; +}) + +(define_insn "sthi_l" + [(set (zero_extract:SI + (mem:SI (plus:SI (ior:SI (match_operand:QI 0 "ua_address_operand" "p") + (const_int 3)) + (const_int -3))) + (plus:SI (and:SI (match_dup 0) (const_int 3)) (const_int 1)) + (const_int 0)) + (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32" + "sthi.l %U0, %1" + [(set_attr "type" "ustore_media")]) + +;; All unaligned stores are considered to be 'narrow' because they typically +;; operate on less that a quadword, and when they operate on a full quadword, +;; the vanilla store high / store low sequence will cause a stall if not +;; scheduled apart. +(define_insn "sthi_q" + [(set (zero_extract:DI + (mem:DI (plus:SI (ior:SI (match_operand:QI 0 "ua_address_operand" "p") + (const_int 7)) + (const_int -7))) + (plus:SI (and:SI (match_dup 0) (const_int 7)) (const_int 1)) + (const_int 0)) + (match_operand:DI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32" + "sthi.q %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn_and_split "*sthi_q_comb0" + [(set (zero_extract:DI + (mem:DI (plus:SI (ior:SI (plus:SI + (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "ua_offset" "I06")) + (const_int 7)) + (const_int -7))) + (plus:SI (and:SI (match_dup 0) (const_int 7)) (const_int 1)) + (const_int 0)) + (match_operand:DI 2 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == 0" + "#" + "" + [(pc)] +{ + emit_insn (gen_sthi_q (gen_rtx_PLUS (SImode, operands[0], operands[1]), + operands[2])); + DONE; +}) + +(define_insn_and_split "*sthi_q_comb1" + [(set (zero_extract:DI + (mem:DI (plus:SI (ior:SI (plus:SI + (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "ua_offset" "I06")) + (const_int 7)) + (const_int -7))) + (plus:SI (and:SI (plus:SI (match_dup 0) + (match_operand:SI 2 "ua_offset" "I06")) + (const_int 7)) + (const_int 1)) + (const_int 0)) + (match_operand:DI 3 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & -8) + && (INTVAL (operands[1]) & 7) == INTVAL (operands[2])" + "#" + "" + [(pc)] +{ + emit_insn (gen_sthi_q (gen_rtx_PLUS (SImode, operands[0], operands[1]), + operands[3])); + DONE; +}) + +;; This is highpart user because the address is used as full 64 bit. +(define_insn "stlo_l" + [(set (zero_extract:SI + (mem:SI (and:SI (match_operand:QI 0 "ua_address_operand" "p") + (const_int -4))) + (minus:SI (const_int 4) (and:SI (match_dup 0) (const_int 3))) + (and:SI (match_dup 0) (const_int 3))) + (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32" + "stlo.l %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn "stlo_q" + [(set (zero_extract:DI + (mem:DI (and:SI (match_operand:QI 0 "ua_address_operand" "p") + (const_int -8))) + (minus:SI (const_int 8) (and:SI (match_dup 0) (const_int 7))) + (and:SI (match_dup 0) (const_int 7))) + (match_operand:DI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32" + "stlo.q %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn_and_split "*stlo_q_comb0" + [(set (zero_extract:DI + (mem:DI (and:SI (plus:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "ua_offset" "I06")) + (const_int -8))) + (minus:SI (const_int 8) (and:SI (match_dup 0) (const_int 7))) + (and:SI (match_dup 0) (const_int 7))) + (match_operand:DI 2 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == 0" + "#" + "" + [(pc)] +{ + emit_insn (gen_stlo_q (gen_rtx_PLUS (SImode, operands[0], operands[1]), + operands[2])); + DONE; +}) + +(define_insn_and_split "*stlo_q_comb1" + [(set (zero_extract:DI + (mem:DI (and:SI (plus:SI (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "ua_offset" "I06")) + (const_int -8))) + (minus:SI (const_int 8) + (and:SI (plus:SI (match_dup 0) + (match_operand:SI 2 "ua_offset" "I06")) + (const_int 7))) + (and:SI (plus:SI (match_dup 0) (match_dup 2)) (const_int 7))) + (match_operand:DI 3 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == INTVAL (operands[2])" + "#" + "" + [(pc)] +{ + emit_insn (gen_stlo_q (gen_rtx_PLUS (SImode, operands[0], operands[1]), + operands[3])); + DONE; +}) + +(define_insn "ldhi_l64" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (zero_extract:SI + (mem:SI (plus:DI (ior:DI (match_operand:QI 1 "ua_address_operand" "p") + (const_int 3)) + (const_int -3))) + (plus:DI (and:DI (match_dup 1) (const_int 3)) (const_int 1)) + (const_int 0)))] + "TARGET_SHMEDIA64" + "ldhi.l %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn "ldhi_q64" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (plus:DI (ior:DI (match_operand:QI 1 "ua_address_operand" "p") + (const_int 7)) + (const_int -7))) + (plus:DI (and:DI (match_dup 1) (const_int 7)) (const_int 1)) + (const_int 0)))] + "TARGET_SHMEDIA64" + "ldhi.q %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn "ldlo_l64" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (zero_extract:SI + (mem:SI (and:DI (match_operand:QI 1 "ua_address_operand" "p") + (const_int -4))) + (minus:DI (const_int 4) (and:DI (match_dup 1) (const_int 3))) + (and:DI (match_dup 1) (const_int 3))))] + "TARGET_SHMEDIA64" + "ldlo.l %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn "ldlo_q64" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extract:DI + (mem:DI (and:DI (match_operand:QI 1 "ua_address_operand" "p") + (const_int -8))) + (minus:DI (const_int 8) (and:DI (match_dup 1) (const_int 7))) + (and:DI (match_dup 1) (const_int 7))))] + "TARGET_SHMEDIA64" + "ldlo.q %U1, %0" + [(set_attr "type" "load_media")]) + +(define_insn "sthi_l64" + [(set (zero_extract:SI + (mem:SI (plus:DI (ior:DI (match_operand:QI 0 "ua_address_operand" "p") + (const_int 3)) + (const_int -3))) + (plus:DI (and:DI (match_dup 0) (const_int 3)) (const_int 1)) + (const_int 0)) + (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA64" + "sthi.l %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn "sthi_q64" + [(set (zero_extract:DI + (mem:DI (plus:DI (ior:DI (match_operand:QI 0 "ua_address_operand" "p") + (const_int 7)) + (const_int -7))) + (plus:DI (and:DI (match_dup 0) (const_int 7)) (const_int 1)) + (const_int 0)) + (match_operand:DI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA64" + "sthi.q %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn "stlo_l64" + [(set (zero_extract:SI + (mem:SI (and:DI (match_operand:QI 0 "ua_address_operand" "p") + (const_int -4))) + (minus:DI (const_int 4) (and:DI (match_dup 0) (const_int 3))) + (and:DI (match_dup 0) (const_int 3))) + (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA64" + "stlo.l %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn "stlo_q64" + [(set (zero_extract:DI + (mem:DI (and:DI (match_operand:QI 0 "ua_address_operand" "p") + (const_int -8))) + (minus:DI (const_int 8) (and:DI (match_dup 0) (const_int 7))) + (and:DI (match_dup 0) (const_int 7))) + (match_operand:DI 1 "arith_reg_operand" "r"))] + "TARGET_SHMEDIA64" + "stlo.q %U0, %1" + [(set_attr "type" "ustore_media")]) + +(define_insn "nsb" + [(set (match_operand:QI 0 "arith_reg_dest" "=r") + (unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")] + UNSPEC_NSB))] + "TARGET_SHMEDIA" + "nsb %1, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "nsbsi" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (zero_extend:SI + (unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")] + UNSPEC_NSB)))] + "TARGET_SHMEDIA" + "nsb %1, %0" + [(set_attr "type" "arith_media")]) + +(define_insn "nsbdi" + [(set (match_operand:DI 0 "arith_reg_dest" "=r") + (zero_extend:DI + (unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")] + UNSPEC_NSB)))] + "TARGET_SHMEDIA" + "nsb %1, %0" + [(set_attr "type" "arith_media")]) + +(define_expand "ffsdi2" + [(set (match_operand:DI 0 "arith_reg_dest" "") + (ffs:DI (match_operand:DI 1 "arith_reg_operand" "")))] + "TARGET_SHMEDIA" +{ + rtx scratch = gen_reg_rtx (DImode); + rtx last; + + emit_insn (gen_adddi3 (scratch, operands[1], constm1_rtx)); + emit_insn (gen_xordi3 (scratch, operands[1], scratch)); + emit_insn (gen_lshrdi3_media (scratch, scratch, const1_rtx)); + emit_insn (gen_nsbdi (scratch, scratch)); + emit_insn (gen_adddi3 (scratch, scratch, GEN_INT (-64))); + emit_insn (gen_movdicc_false (scratch, operands[1], const0_rtx, scratch)); + last = emit_insn (gen_subdi3 (operands[0], const0_rtx, scratch)); + set_unique_reg_note (last, REG_EQUAL, gen_rtx_FFS (DImode, operands[0])); + + DONE; +}) + +(define_expand "ffssi2" + [(set (match_operand:SI 0 "arith_reg_dest" "") + (ffs:SI (match_operand:SI 1 "arith_reg_operand" "")))] + "TARGET_SHMEDIA" +{ + rtx scratch = gen_reg_rtx (SImode); + rtx discratch = gen_reg_rtx (DImode); + rtx last; + + emit_insn (gen_adddi3 (discratch, + simplify_gen_subreg (DImode, operands[1], SImode, 0), + constm1_rtx)); + emit_insn (gen_andcdi3 (discratch, + simplify_gen_subreg (DImode, operands[1], SImode, 0), + discratch)); + emit_insn (gen_nsbsi (scratch, discratch)); + last = emit_insn (gen_subsi3 (operands[0], + force_reg (SImode, GEN_INT (63)), scratch)); + set_unique_reg_note (last, REG_EQUAL, gen_rtx_FFS (SImode, operands[0])); + + DONE; +}) + +(define_insn "byterev" + [(set (match_operand:V8QI 0 "arith_reg_dest" "=r") + (vec_select:V8QI (match_operand:V8QI 1 "arith_reg_operand" "r") + (parallel [(const_int 7) (const_int 6) (const_int 5) + (const_int 4) (const_int 3) (const_int 2) + (const_int 1) (const_int 0)])))] + "TARGET_SHMEDIA" + "byterev %1, %0" + [(set_attr "type" "arith_media")]) + +;; In user mode, the "pref" instruction will raise a RADDERR exception +;; for accesses to [0x80000000,0xffffffff]. This makes it an unsuitable +;; implementation of __builtin_prefetch for VxWorks RTPs. +(define_expand "prefetch" + [(prefetch (match_operand 0 "address_operand" "") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" ""))] + "(TARGET_SH2A || TARGET_SH3 || TARGET_SH5) + && (TARGET_SHMEDIA || ! TARGET_VXWORKS_RTP)") + +(define_insn "*prefetch" + [(prefetch (match_operand:SI 0 "register_operand" "r") + (match_operand:SI 1 "const_int_operand" "n") + (match_operand:SI 2 "const_int_operand" "n"))] + "(TARGET_SH2A || TARGET_SH3 || TARGET_SHCOMPACT) && ! TARGET_VXWORKS_RTP" + "pref @%0" + [(set_attr "type" "other")]) + +(define_insn "*prefetch_media" + [(prefetch (match_operand:QI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (match_operand:SI 2 "const_int_operand" "n"))] + "TARGET_SHMEDIA" +{ + operands[0] = gen_rtx_MEM (QImode, operands[0]); + output_asm_insn ("ld%M0.b %m0,r63", operands); + return ""; +} + [(set_attr "type" "other")]) + +(define_insn "alloco_i" + [(set (mem:BLK (match_operand:QI 0 "cache_address_operand" "p")) + (unspec:BLK [(const_int 0)] UNSPEC_ALLOCO))] + "TARGET_SHMEDIA32" +{ + rtx xops[2]; + + if (GET_CODE (operands[0]) == PLUS) + { + xops[0] = XEXP (operands[0], 0); + xops[1] = XEXP (operands[0], 1); + } + else + { + xops[0] = operands[0]; + xops[1] = const0_rtx; + } + output_asm_insn ("alloco %0, %1", xops); + return ""; +} + [(set_attr "type" "other")]) + +(define_split + [(set (match_operand 0 "any_register_operand" "") + (match_operand 1 "" ""))] + "TARGET_SHMEDIA && reload_completed" + [(set (match_dup 0) (match_dup 1))] +{ + int n_changes = 0; + + for_each_rtx (&operands[1], shmedia_cleanup_truncate, &n_changes); + if (!n_changes) + FAIL; +}) + +;; ------------------------------------------------------------------------- +;; Stack Protector Patterns +;; ------------------------------------------------------------------------- + +(define_expand "stack_protect_set" + [(set (match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" ""))] + "" +{ + if (TARGET_SHMEDIA) + { + if (TARGET_SHMEDIA64) + emit_insn (gen_stack_protect_set_di_media (operands[0], operands[1])); + else + emit_insn (gen_stack_protect_set_si_media (operands[0], operands[1])); + } + else + emit_insn (gen_stack_protect_set_si (operands[0], operands[1])); + + DONE; +}) + +(define_insn "stack_protect_set_si" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0))] + "!TARGET_SHMEDIA" +{ + return "mov.l %1,%2" "\n" + " mov.l %2,%0" "\n" + " mov #0,%2"; +} + [(set_attr "type" "other") + (set_attr "length" "6")]) + +(define_insn "stack_protect_set_si_media" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:SI 2 "=&r") (const_int 0))] + "TARGET_SHMEDIA" +{ + return "ld%M1.l %m1,%2" "\n" + " st%M0.l %m0,%2" "\n" + " movi 0,%2"; +} + [(set_attr "type" "other") + (set_attr "length" "12")]) + +(define_insn "stack_protect_set_di_media" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET)) + (set (match_scratch:DI 2 "=&r") (const_int 0))] + "TARGET_SHMEDIA64" +{ + return "ld%M1.q %m1,%2" "\n" + " st%M0.q %m0,%2" "\n" + " movi 0,%2"; +} + [(set_attr "type" "other") + (set_attr "length" "12")]) + +(define_expand "stack_protect_test" + [(match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "") + (match_operand 2 "" "")] + "" +{ + if (TARGET_SHMEDIA) + { + rtx tmp = gen_reg_rtx (GET_MODE (operands[0])); + rtx test; + + test = gen_rtx_NE (VOIDmode, tmp, const0_rtx); + if (TARGET_SHMEDIA64) + { + emit_insn (gen_stack_protect_test_di_media (tmp, operands[0], + operands[1])); + emit_jump_insn (gen_cbranchdi4 (test, tmp, const0_rtx, operands[2])); + } + else + { + emit_insn (gen_stack_protect_test_si_media (tmp, operands[0], + operands[1])); + emit_jump_insn (gen_cbranchsi4 (test, tmp, const0_rtx, operands[2])); + } + } + else + { + emit_insn (gen_stack_protect_test_si (operands[0], operands[1])); + emit_jump_insn (gen_branch_true (operands[2])); + } + + DONE; +}) + +(define_insn "stack_protect_test_si" + [(set (reg:SI T_REG) + (unspec:SI [(match_operand:SI 0 "memory_operand" "m") + (match_operand:SI 1 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (set (match_scratch:SI 2 "=&r") (const_int 0)) + (set (match_scratch:SI 3 "=&r") (const_int 0))] + "!TARGET_SHMEDIA" +{ + return "mov.l %0,%2" "\n" + " mov.l %1,%3" "\n" + " cmp/eq %2,%3" "\n" + " mov #0,%2" "\n" + " mov #0,%3"; +} + [(set_attr "type" "other") + (set_attr "length" "10")]) + +(define_insn "stack_protect_test_si_media" + [(set (match_operand:SI 0 "register_operand" "=&r") + (unspec:SI [(match_operand:SI 1 "memory_operand" "m") + (match_operand:SI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (set (match_scratch:SI 3 "=&r") (const_int 0))] + "TARGET_SHMEDIA" +{ + return "ld%M1.l %m1,%0" "\n" + " ld%M2.l %m2,%3" "\n" + " cmpeq %0,%3,%0" "\n" + " movi 0,%3"; +} + [(set_attr "type" "other") + (set_attr "length" "16")]) + +(define_insn "stack_protect_test_di_media" + [(set (match_operand:DI 0 "register_operand" "=&r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "m") + (match_operand:DI 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (set (match_scratch:DI 3 "=&r") (const_int 0))] + "TARGET_SHMEDIA64" +{ + return "ld%M1.q %m1,%0" "\n" + " ld%M2.q %m2,%3" "\n" + " cmpeq %0,%3,%0" "\n" + " movi 0,%3"; +} + [(set_attr "type" "other") + (set_attr "length" "16")]) + +;; ------------------------------------------------------------------------- +;; Atomic operations +;; ------------------------------------------------------------------------- + +(include "sync.md") |