44 files changed, 41730 insertions, 0 deletions
diff --git a/gcc-4.9/gcc/config/sh/constraints.md b/gcc-4.9/gcc/config/sh/constraints.md
new file mode 100644
index 000000000..17a448fc0
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/constraints.md
@@ -0,0 +1,324 @@
+;; Constraint definitions for Renesas / SuperH SH.
+;; Copyright (C) 2007-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Overview of uppercase letter constraints:
+;; Bxx: miscellaneous constraints
+;;  Bsc: SCRATCH - for the scratch register in movsi_ie in the
+;;       fldi0 / fldi0 cases
+;; Cxx: Constants other than only CONST_INT
+;;  Css: signed 16-bit constant, literal or symbolic
+;;  Csu: unsigned 16-bit constant, literal or symbolic
+;;  Csy: label or symbol
+;;  Cpg: non-explicit constants that can be directly loaded into a general
+;;       purpose register in PIC code.  Like 's' except we don't allow
+;;       PIC_ADDR_P
+;; IJKLMNOP: CONT_INT constants
+;;  Ixx: signed xx bit
+;;  J16: 0xffffffff00000000 | 0x00000000ffffffff
+;;  Jmb: 0x000000FF
+;;  Jmw: 0x0000FFFF
+;;  Jhb: 0x80000000
+;;  Kxx: unsigned xx bit
+;;  M: 1
+;;  N: 0
+;;  P27: 1 | 2 | 8 | 16
+;;  Pso: 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128
+;;  Psz: ~1 | ~2 | ~4 | ~8 | ~16 | ~32 | ~64 | ~128
+;; G: Floating point 0
+;; H: Floating point 1
+;; Q: pc relative load operand
+;; Rxx: reserved for exotic register classes.
+;; Sxx: extra memory constraints
+;;  Sua: unaligned memory address
+;;  Sbv: QImode address without displacement
+;;  Sbw: QImode address with 12 bit displacement
+;;  Snd: address without displacement
+;;  Sdd: address with displacement
+;;  Sra: simple register address
+;; W: vector
+;; Z: zero in any mode
+;;
+;; unused CONST_INT constraint letters: LO
+;; unused EXTRA_CONSTRAINT letters: D T U Y
+
+;; Register constraints
+(define_register_constraint "a" "ALL_REGS"
+  "@internal")
+
+(define_register_constraint "b" "TARGET_REGS"
+  "Branch target registers.")
+
+(define_register_constraint "c" "FPSCR_REGS"
+  "Floating-point status register.")
+
+(define_register_constraint "d" "DF_REGS"
+  "Double precision floating-point register.")
+
+(define_register_constraint "e" "TARGET_FMOVD ? NO_REGS : FP_REGS"
+  "Floating-point register.")
+
+(define_register_constraint "f" "FP_REGS"
+  "Floating-point register.")
+
+(define_register_constraint "k" "SIBCALL_REGS"
+  "@internal")
+
+(define_register_constraint "l" "PR_REGS"
+  "PR register.")
+
+(define_register_constraint "t" "T_REGS"
+  "T register.")
+
+(define_register_constraint "u" "NON_SP_REGS"
+  "Non-stack-pointer register.")
+
+(define_register_constraint "w" "FP0_REGS"
+  "Floating-point register 0.")
+
+(define_register_constraint "x" "MAC_REGS"
+  "MACH and MACL registers.")
+
+(define_register_constraint "y" "FPUL_REGS"
+  "FPUL register.")
+
+(define_register_constraint "z" "R0_REGS"
+  "R0 register.")
+
+;; Integer constraints
+(define_constraint "I06"
+  "A signed 6-bit constant, as used in SHmedia beqi, bnei and xori."
+  (and (match_code "const_int")
+       (match_test "ival >= -32 && ival <= 31")))
+
+(define_constraint "I08"
+  "A signed 8-bit constant, as used in add, sub, etc."
+  (and (match_code "const_int")
+       (match_test "ival >= -128 && ival <= 127")))
+
+(define_constraint "I10"
+  "A signed 10-bit constant, as used in SHmedia andi, ori."
+  (and (match_code "const_int")
+       (match_test "ival >= -512 && ival <= 511")))
+
+(define_constraint "I16"
+  "A signed 16-bit constant, as used in SHmedia movi."
+  (and (match_code "const_int")
+       (match_test "ival >= -32768 && ival <= 32767")))
+
+(define_constraint "I20"
+  "A signed 20-bit constant, as used in SH2A movi20."
+  (and (match_code "const_int")
+       (match_test "ival >= -524288 && ival <= 524287")
+       (match_test "TARGET_SH2A")))
+
+(define_constraint "I28"
+  "A signed 28-bit constant, as used in SH2A movi20s."
+  (and (match_code "const_int")
+       (match_test "ival >=  -134217728 && ival <= 134217727")
+       (match_test "(ival & 255) == 0")
+       (match_test "TARGET_SH2A")))
+
+(define_constraint "J16"
+  "0xffffffff00000000 or 0x00000000ffffffff."
+  (and (match_code "const_int")
+       (match_test "CONST_OK_FOR_J16 (ival)")))
+
+(define_constraint "Jmb"
+  "Low byte mask constant 0x000000FF"
+  (and (match_code "const_int")
+       (match_test "ival == 0xFF")))
+
+(define_constraint "Jmw"
+  "Low word mask constant 0x0000FFFF"
+  (and (match_code "const_int")
+       (match_test "ival == 0xFFFF")))
+
+(define_constraint "Jhb"
+  "Highest bit constant"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xFFFFFFFF) == 0x80000000")))
+
+(define_constraint "K03"
+  "An unsigned 3-bit constant, as used in SH2A bclr, bset, etc."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 7")))
+
+(define_constraint "K04"
+  "An unsigned 4-bit constant, as used in mov.b displacement addressing."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 15")))
+
+(define_constraint "K05"
+  "An unsigned 5-bit constant, as used in mov.w displacement addressing."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 31")))
+
+(define_constraint "K08"
+  "An unsigned 8-bit constant, as used in and, or, etc."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 255")))
+ 
+(define_constraint "K12"
+  "An unsigned 12-bit constant, as used in SH2A 12-bit mov.b displacement
+   addressing."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 4095")))
+
+(define_constraint "K13"
+  "An unsigned 13-bit constant, as used in SH2A 12-bit mov.w displacement
+   addressing."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 8191")))
+
+(define_constraint "K16"
+  "An unsigned 16-bit constant, as used in SHmedia shori."
+  (and (match_code "const_int")
+       (match_test "ival >= 0 && ival <= 65535")))
+ 
+(define_constraint "P27"
+  "A constant for shift operand 1,2,8 or 16."
+  (and (match_code "const_int")
+       (match_test "ival == 1 || ival == 2 || ival == 8 || ival == 16")))
+
+(define_constraint "M"
+  "Integer constant 1."
+  (and (match_code "const_int")
+       (match_test "ival == 1")))
+
+(define_constraint "N"
+  "Integer constant 0."
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+;; Floating-point constraints
+(define_constraint "G"
+  "Double constant 0."
+  (and (match_code "const_double")
+       (match_test "fp_zero_operand (op) && fldi_ok ()")))
+
+(define_constraint "H"
+  "Double constant 1."
+  (and (match_code "const_double")
+       (match_test "fp_one_operand (op) && fldi_ok ()")))
+
+;; Extra constraints
+(define_constraint "Q"
+  "A pc relative load operand."
+  (and (match_code "mem")
+       (match_test "GET_MODE (op) != QImode")
+       (match_test "IS_PC_RELATIVE_LOAD_ADDR_P (XEXP (op, 0))")))
+
+(define_constraint "Bsc"
+  "Constraint for selecting FLDI0 or FLDI1 instruction.  If the clobber
+   operand is not SCRATCH (i.e. REG) then R0 is probably being used,
+   hence mova is being used, hence do not select this pattern."
+  (match_code "scratch"))
+
+(define_constraint "Css"
+  "A signed 16-bit constant, literal or symbolic."
+  (and (match_code "const")
+       (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC")
+       (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_EXTRACT_S16")))
+
+(define_constraint "Csu"
+  "An unsigned 16-bit constant, literal or symbolic."
+  (and (match_code "const")
+       (match_test "GET_CODE (XEXP (op, 0)) == UNSPEC")
+       (match_test "XINT (XEXP (op, 0), 1) == UNSPEC_EXTRACT_U16")))
+
+(define_constraint "Csy"
+  "A label or a symbol."
+  (ior (match_test "NON_PIC_REFERENCE_P (op)")
+       (match_test "PIC_ADDR_P (op)")))
+
+(define_constraint "Z"
+  "A zero in any shape or form."
+  (match_test "op == CONST0_RTX (GET_MODE (op))"))
+
+(define_constraint "W"
+  "Any vector constant we can handle."
+  (and (match_code "const_vector")
+       (ior (match_test "sh_rep_vec (op, VOIDmode)")
+	    (match_test "HOST_BITS_PER_WIDE_INT >= 64
+			 ? sh_const_vec (op, VOIDmode)
+			 : sh_1el_vec (op, VOIDmode)"))))
+
+(define_constraint "Cpg"
+  "A non-explicit constant that can be loaded directly into a general
+   purpose register.  This is like 's' except we don't allow
+   PIC_ADDR_P."
+  (match_test "IS_NON_EXPLICIT_CONSTANT_P (op)"))
+
+(define_constraint "Pso"
+  "Integer constant with a single bit set in its lower 8-bit."
+  (and (match_code "const_int")
+       (ior (match_test "ival == 1")
+	    (match_test "ival == 2")
+	    (match_test "ival == 4")
+	    (match_test "ival == 8")
+	    (match_test "ival == 16")
+	    (match_test "ival == 32")
+	    (match_test "ival == 64")
+	    (match_test "ival == 128"))))
+
+(define_constraint "Psz"
+  "Integer constant with a single zero bit in the lower 8-bit."
+  (and (match_code "const_int")
+       (ior (match_test "~ival == 1")
+	    (match_test "~ival == 2")
+	    (match_test "~ival == 4")
+	    (match_test "~ival == 8")
+	    (match_test "~ival == 16")
+	    (match_test "~ival == 32")
+	    (match_test "~ival == 64")
+	    (match_test "~ival == 128"))))
+
+(define_memory_constraint "Sua"
+  "@internal"
+  (and (match_test "memory_operand (op, GET_MODE (op))")
+       (match_test "GET_CODE (XEXP (op, 0)) != PLUS")))
+
+(define_memory_constraint "Sdd"
+  "A memory reference that uses displacement addressing."
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == PLUS")
+       (match_test "REG_P (XEXP (XEXP (op, 0), 0))")
+       (match_test "CONST_INT_P (XEXP (XEXP (op, 0), 1))")))
+
+(define_memory_constraint "Snd"
+  "A memory reference that excludes displacement addressing."
+  (and (match_code "mem")
+       (match_test "! satisfies_constraint_Sdd (op)")))
+
+(define_memory_constraint "Sbv"
+  "A memory reference, as used in SH2A bclr.b, bset.b, etc."
+  (and (match_test "MEM_P (op) && GET_MODE (op) == QImode")
+       (match_test "REG_P (XEXP (op, 0))")))
+
+(define_memory_constraint "Sbw"
+  "A memory reference, as used in SH2A bclr.b, bset.b, etc."
+  (and (match_test "satisfies_constraint_Sdd (op)")
+       (match_test "GET_MODE (op) == QImode")
+       (match_test "satisfies_constraint_K12 (XEXP (XEXP (op, 0), 1))")))
+
+(define_memory_constraint "Sra"
+  "A memory reference that uses simple register addressing."
+  (and (match_test "MEM_P (op)")
+       (match_test "REG_P (XEXP (op, 0))")))
+
diff --git a/gcc-4.9/gcc/config/sh/divcost-analysis b/gcc-4.9/gcc/config/sh/divcost-analysis
new file mode 100644
index 000000000..9fb6e6fa5
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/divcost-analysis
@@ -0,0 +1,88 @@
+Analysis of cycle costs for SH4:
+
+-> udiv_le128:            5
+-> udiv_ge64k:            6
+-> udiv udiv_25:         10
+-> pos_divisor:           3
+-> pos_result linear:     5
+-> pos_result - -:        5
+-> div_le128:             7
+-> div_ge64k:             9
+sdivsi3 -> udiv_25             13
+udiv25 -> div_ge64k_end:       15
+div_ge64k_end -> rts:          13
+div_le128 -> div_le128_2:       2, r1 latency 3
+udiv_le128 -> div_le128_2:      2, r1 latency 3
+(u)div_le128 -> div_by_1:       9
+(u)div_le128 -> rts:           17
+div_by_1(_neg) -> rts:          4
+div_ge64k -> div_r8:            2
+div_ge64k -> div_ge64k_2:       3
+udiv_ge64k -> udiv_r8:          3
+udiv_ge64k -> div_ge64k_2:      3 + LS
+(u)div_ge64k -> div_ge64k_end: 13
+div_r8 -> div_r8_2:             2
+udiv_r8 -> div_r8_2:            2 + LS
+(u)div_r8 -> rts:              21
+
+-> - + neg_result:             5
+-> + - neg_result:             5
+-> div_le128_neg:              7
+-> div_ge64k_neg:              9
+-> div_r8_neg:                11
+-> <64k div_ge64k_neg_end:    28
+-> >=64k div_ge64k_neg_end:   22
+div_ge64k_neg_end ft -> rts:  14
+div_r8_neg_end -> rts:         4
+div_r8_neg -> div_r8_neg_end: 18
+div_le128_neg -> div_by_1_neg: 4
+div_le128_neg -> rts          18
+
+         sh4-200    absolute divisor range:
+            1  [2..128]  [129..64K) [64K..|dividend|/256] >=64K,>|dividend/256|
+udiv       18     22         38            32                   30
+sdiv pos:  20     24         41            35                   32
+sdiv neg:  15     25         42            36                   33
+
+         sh4-300    absolute divisor range:
+                 8 bit      16 bit       24 bit              > 24 bit
+udiv              15         35            28                   25
+sdiv              14         36            34                   31
+
+
+fp-based:
+
+unsigned: 42 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
+signed: 33 + 3 + 3 (lingering ftrc latency + sts fpul,rx) at caller's site
+
+call-div1:    divisor range:
+              [1..64K)  >= 64K
+unsigned:       63        58
+signed:         76        76
+
+SFUNC_STATIC call overhead:
+mov.l 0f,r1
+bsrf r1
+
+SFUNC_GOT call overhead - current:
+mov.l 0f,r1
+mova 0f,r0
+mov.l 1f,r2
+add r1,r0
+mov.l @(r0,r2),r0
+jmp @r0
+; 3 cycles worse than SFUNC_STATIC
+
+SFUNC_GOT call overhead - improved assembler:
+mov.l 0f,r1
+mova 0f,r0
+mov.l @(r0,r1),r0
+jmp @r0
+; 2 cycles worse than SFUNC_STATIC
+
+
+Copyright (C) 2006-2014 Free Software Foundation, Inc.
+
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
diff --git a/gcc-4.9/gcc/config/sh/divtab-sh4-300.c b/gcc-4.9/gcc/config/sh/divtab-sh4-300.c
new file mode 100644
index 000000000..4941626a8
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/divtab-sh4-300.c
@@ -0,0 +1,77 @@
+/* Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Calculate division table for ST40-300 integer division
+   Contributed by Joern Rennecke
+   joern.rennecke@st.com  */
+
+#include <stdio.h>
+#include <math.h>
+
+int
+main ()
+{
+  int i, j;
+  double q, r, err, max_err = 0, max_s_err = 0;
+
+  puts("/* This table has been generated by divtab-sh4.c.  */");
+  puts ("\t.balign 4");
+  for (i = -128; i < 128; i++)
+    {
+      int n = 0;
+      if (i == 0)
+	{
+	  /* output some dummy number for 1/0.  */
+	  puts ("LOCAL(div_table_clz):\n\t.byte\t0");
+	  continue;
+	}
+      for (j = i < 0 ? -i : i; j < 128; j += j)
+	n++;
+      printf ("\t.byte\t%d\n", n - 7);
+    }
+  puts("\
+/* 1/-128 .. 1/127, normalized.  There is an implicit leading 1 in bit 32,\n\
+   or in bit 33 for powers of two.  */\n\
+	.balign 4");
+  for (i = -128; i < 128; i++)
+    {
+      if (i == 0)
+	{
+	  puts ("LOCAL(div_table_inv):\n\t.long\t0x0");
+	  continue;
+	}
+      j = i < 0 ? -i : i;
+      while (j < 64)
+	j += j;
+      q = 4.*(1<<30)*128/j;
+      r = ceil (q);
+      printf ("\t.long\t0x%X\n", (unsigned) r);
+      err = r - q;
+      if (err > max_err)
+	max_err = err;
+      err = err * j / 128;
+      if (err > max_s_err)
+	max_s_err = err;
+    }
+  printf ("\t/* maximum error: %f scaled: %f*/\n", max_err, max_s_err);
+  exit (0);
+}
diff --git a/gcc-4.9/gcc/config/sh/divtab-sh4.c b/gcc-4.9/gcc/config/sh/divtab-sh4.c
new file mode 100644
index 000000000..421571e1e
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/divtab-sh4.c
@@ -0,0 +1,85 @@
+/* Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Calculate division table for SH2..4 integer division
+   Contributed by Joern Rernnecke
+   joern.rennecke@superh.com  */
+
+#include <stdio.h>
+#include <math.h>
+
+int
+main ()
+{
+  int i, j;
+  double q, r, err, max_err = 0, max_s_err = 0;
+
+  puts("/* This table has been generated by divtab-sh4.c.  */");
+  puts ("\t.balign 4");
+  puts ("LOCAL(div_table_clz):");
+  /* output some dummy number for 1/0.  */
+  printf ("\t.byte\t%d\n", 0);
+  for (i = 1; i <= 128; i++)
+    {
+      int n = 0;
+      if (i == 128)
+	puts ("\
+/* Lookup table translating positive divisor to index into table of\n\
+   normalized inverse.  N.B. the '0' entry is also the last entry of the\n\
+ previous table, and causes an unaligned access for division by zero.  */\n\
+LOCAL(div_table_ix):");
+      for (j = i; j <= 128; j += j)
+	n++;
+      printf ("\t.byte\t%d\n", n - 7);
+    }
+  for (i = 1; i <= 128; i++)
+    {
+      j = i < 0 ? -i : i;
+      while (j < 128)
+	j += j;
+      printf ("\t.byte\t%d\n", j * 2 - 96*4);
+    }
+  puts("\
+/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */\n\
+	.balign 4\n\
+LOCAL(zero_l):");
+  for (i = 64; i < 128; i++)
+    {
+      if (i == 96)
+	puts ("LOCAL(div_table):");
+      q = 4.*(1<<30)*128/i;
+      r = ceil (q);
+      /* The value for 64 is actually differently scaled that it would
+	 appear from this calculation.  The implicit part is %01, not 10.
+	 Still, since the value in the table is 0 either way, this
+	 doesn't matter here.  Still, the 1/64 entry is effectively a 1/128
+	 entry.  */
+      printf ("\t.long\t0x%X\n", (unsigned) r);
+      err = r - q;
+      if (err > max_err)
+	max_err = err;
+      err = err * i / 128;
+      if (err > max_s_err)
+	max_s_err = err;
+    }
+  printf ("\t/* maximum error: %f scaled: %f*/\n", max_err, max_s_err);
+  exit (0);
+}
diff --git a/gcc-4.9/gcc/config/sh/divtab.c b/gcc-4.9/gcc/config/sh/divtab.c
new file mode 100644
index 000000000..40a26eb74
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/divtab.c
@@ -0,0 +1,203 @@
+/* Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* Calculate division table for SH5Media integer division
+   Contributed by Joern Rennecke
+   joern.rennecke@superh.com  */
+
+#include <stdio.h>
+#include <math.h>
+
+#define BITS 5
+#define N_ENTRIES (1 << BITS)
+#define CUTOFF_BITS 20
+
+#define BIAS (-330)
+
+double max_defect = 0.;
+double max_defect_x;
+
+double min_defect = 1e9;
+double min_defect_x;
+
+double max_defect2 = 0.;
+double max_defect2_x;
+
+double min_defect2 = 0.;
+double min_defect2_x;
+
+double min_defect3 = 01e9;
+double min_defect3_x;
+int min_defect3_val;
+
+double max_defect3 = 0.;
+double max_defect3_x;
+int max_defect3_val;
+
+static double
+note_defect3 (int val, double d2, double y2d, double x)
+{
+  int cutoff_val = val >> CUTOFF_BITS;
+  double cutoff;
+  double defect;
+
+  if (val < 0)
+    cutoff_val++;
+  cutoff = (cutoff_val * (1<<CUTOFF_BITS) - val) * y2d;
+  defect = cutoff + val * d2;
+  if (val < 0)
+    defect = - defect;
+  if (defect > max_defect3)
+    {
+      max_defect3 = defect;
+      max_defect3_x = x;
+      max_defect3_val = val;
+    }
+  if (defect < min_defect3)
+    {
+      min_defect3 = defect;
+      min_defect3_x = x;
+      min_defect3_val = val;
+    }
+}
+
+/* This function assumes 32-bit integers.  */
+static double
+calc_defect (double x, int constant, int factor)
+{
+  double y0 = (constant - (int) floor ((x * factor * 64.))) / 16384.;
+  double y1 = 2 * y0 -y0 * y0 * (x + BIAS / (1.*(1LL<<30)));
+  double y2d0, y2d;
+  int y2d1;
+  double d, d2;
+
+  y1 = floor (y1 * (1024 * 1024 * 1024)) / (1024 * 1024 * 1024);
+  d = y1 - 1 / x;
+  if (d > max_defect)
+    {
+      max_defect = d;
+      max_defect_x = x;
+    }
+  if (d < min_defect)
+    {
+      min_defect = d;
+      min_defect_x = x;
+    }
+  y2d0 = floor (y1 * x * (1LL << 60-16));
+  y2d1 = (int) (long long) y2d0;
+  y2d = - floor ((y1 - y0 / (1<<30-14)) * y2d1) / (1LL<<44);
+  d2 = y1 + y2d - 1/x;
+  if (d2 > max_defect2)
+    {
+      max_defect2 = d2;
+      max_defect2_x = x;
+    }
+  if (d2 < min_defect2)
+    {
+      min_defect2 = d2;
+      min_defect2_x = x;
+    }
+  /* zero times anything is trivially zero.  */
+  note_defect3 ((1 << CUTOFF_BITS) - 1, d2, y2d, x);
+  note_defect3 (1 << CUTOFF_BITS, d2, y2d, x);
+  note_defect3 ((1U << 31) - (1 << CUTOFF_BITS), d2, y2d, x);
+  note_defect3 ((1U << 31) - 1, d2, y2d, x);
+  note_defect3 (-1, d2, y2d, x);
+  note_defect3 (-(1 << CUTOFF_BITS), d2, y2d, x);
+  note_defect3 ((1U << 31) - (1 << CUTOFF_BITS) + 1, d2, y2d, x);
+  note_defect3 (-(1U << 31), d2, y2d, x);
+  return d;
+}
+
+int
+main ()
+{
+  int i;
+  unsigned char factors[N_ENTRIES];
+  short constants[N_ENTRIES];
+  int steps = N_ENTRIES / 2;
+  double step = 1. / steps;
+  double eps30 = 1. / (1024 * 1024 * 1024);
+
+  for (i = 0; i < N_ENTRIES; i++)
+    {
+      double x_low = (i < steps ? 1. : -3.) + i * step;
+      double x_high = x_low + step - eps30;
+      double x_med;
+      int factor, constant;
+      double low_defect, med_defect, high_defect, max_defect;
+
+      factor = (1./x_low- 1./x_high) / step * 256. + 0.5;
+      if (factor == 256)
+	factor = 255;
+      factors[i] = factor;
+      /* Use minimum of error function for x_med.  */
+      x_med = sqrt (256./factor);
+      if (x_low < 0)
+	x_med = - x_med;
+      low_defect = 1. / x_low + x_low * factor / 256.;
+      high_defect = 1. / x_high + x_high * factor / 256.;
+      med_defect = 1. / x_med + x_med * factor / 256.;
+      max_defect
+	= ((low_defect > high_defect) ^ (x_med < 0)) ? low_defect : high_defect;
+      constant = (med_defect + max_defect) * 0.5 * 16384. + 0.5;
+      if (constant < -32768 || constant > 32767)
+	abort ();
+      constants[i] = constant;
+      calc_defect (x_low, constant, factor);
+      calc_defect (x_med, constant, factor);
+      calc_defect (x_high, constant, factor);
+    }
+    printf ("/* This table has been generated by divtab.c .\n");
+    printf ("Defects for bias %d:\n", BIAS);
+    printf ("   Max defect: %e at %e\n", max_defect, max_defect_x);
+    printf ("   Min defect: %e at %e\n", min_defect, min_defect_x);
+    printf ("   Max 2nd step defect: %e at %e\n", max_defect2, max_defect2_x);
+    printf ("   Min 2nd step defect: %e at %e\n", min_defect2, min_defect2_x);
+    printf ("   Max div defect: %e at %d:%e\n", max_defect3, max_defect3_val,
+						max_defect3_x);
+    printf ("   Min div defect: %e at %d:%e\n", min_defect3, min_defect3_val,
+						min_defect3_x);
+    printf ("   Defect at 1: %e\n",
+	    calc_defect (1., constants[0], factors[0]));
+    printf ("   Defect at -2: %e */\n",
+	    calc_defect (-2., constants[steps], factors[steps]));
+    printf ("\t.section\t.rodata\n");
+    printf ("\t.balign 2\n");
+    printf ("/* negative division constants */\n");
+    for (i = steps; i < 2 * steps; i++)
+      printf ("\t.word\t%d\n", constants[i]);
+    printf ("/* negative division factors */\n");
+    for (i = steps; i < 2*steps; i++)
+      printf ("\t.byte\t%d\n", factors[i]);
+    printf ("\t.skip %d\n", steps);
+    printf ("\t.global	GLOBAL(div_table):\n");
+    printf ("GLOBAL(div_table):\n");
+    printf ("\t.skip %d\n", steps);
+    printf ("/* positive division factors */\n");
+    for (i = 0; i < steps; i++)
+      printf ("\t.byte\t%d\n", factors[i]);
+    printf ("/* positive division constants */\n");
+    for (i = 0; i < steps; i++)
+      printf ("\t.word\t%d\n", constants[i]);
+  exit (0);
+}
diff --git a/gcc-4.9/gcc/config/sh/elf.h b/gcc-4.9/gcc/config/sh/elf.h
new file mode 100644
index 000000000..24b5c9815
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/elf.h
@@ -0,0 +1,88 @@
+/* Definitions of target machine for gcc for Renesas / SuperH SH using ELF.
+   Copyright (C) 1996-2014 Free Software Foundation, Inc.
+   Contributed by Ian Lance Taylor <ian@cygnus.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Let sh.c know this is ELF.  */
+#undef TARGET_ELF
+#define TARGET_ELF 1
+
+/* Generate DWARF2 debugging information and make it the default.  */
+#define DWARF2_DEBUGGING_INFO 1
+
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+/* Use a more compact format for line information.  */
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+#undef WCHAR_TYPE
+/* #define WCHAR_TYPE (TARGET_SH5 ? "int" : "long int")  */
+#define WCHAR_TYPE SH_ELF_WCHAR_TYPE
+   
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* The prefix to add to user-visible assembler symbols.  */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+#undef SIZE_TYPE
+#define SIZE_TYPE (TARGET_SH5 ? "long unsigned int" : "unsigned int")
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_SH5 ? "long int" : "int")
+
+/* Pass -ml and -mrelax to the assembler and linker.  */
+#undef ASM_SPEC
+#define ASM_SPEC SH_ASM_SPEC
+
+#undef LINK_SPEC
+#define LINK_SPEC SH_LINK_SPEC
+#undef LINK_EMUL_PREFIX
+#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
+#define LINK_EMUL_PREFIX "sh%{!mb:l}elf"
+#else
+#define LINK_EMUL_PREFIX "sh%{ml:l}elf"
+#endif
+
+#define DBX_REGISTER_NUMBER(REGNO) SH_DBX_REGISTER_NUMBER (REGNO)
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM) \
+  sprintf ((STRING), "*%s%s%ld", LOCAL_LABEL_PREFIX, (PREFIX), (long)(NUM))
+
+#define DBX_LINES_FUNCTION_RELATIVE 1
+#define DBX_OUTPUT_NULL_N_SO_AT_MAIN_SOURCE_FILE_END
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: crt1.o%s} crti.o%s \
+   %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s"
+
+#undef LIB_SPEC
+#define LIB_SPEC "-lc"
+
+/* ASM_OUTPUT_CASE_LABEL is defined in elfos.h.  With it,
+   a redundant .align was generated.  */
+#undef  ASM_OUTPUT_CASE_LABEL
diff --git a/gcc-4.9/gcc/config/sh/embed-elf.h b/gcc-4.9/gcc/config/sh/embed-elf.h
new file mode 100644
index 000000000..380425751
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/embed-elf.h
@@ -0,0 +1,36 @@
+/* Definitions of target machine for GNU compiler for Renesas / SuperH SH 
+   non-Linux embedded targets.
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by J"orn Rennecke <joern.rennecke@superh.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef USER_LABEL_PREFIX
+#define USER_LABEL_PREFIX "_"
+
+/* While the speed-optimized implementations of udivsi3_i4i / sdivsi3_i4i
+   in libgcc are not available for SH2, the space-optimized ones in
+   libgcc-Os-4-200 are.  Thus, when not optimizing for space, link
+   libgcc-Os-4-200 after libgcc, so that -mdiv=call-table works for -m2.  */
+#define LIBGCC_SPEC "%{!shared: \
+  %{m4-100*:-lic_invalidate_array_4-100} \
+  %{m4-200*:-lic_invalidate_array_4-200} \
+  %{m4-300*|m4-340:-lic_invalidate_array_4a %{!Os: -lgcc-4-300}} \
+  %{m4a*:-lic_invalidate_array_4a}} \
+  %{Os: -lgcc-Os-4-200} \
+  -lgcc \
+  %{!Os: -lgcc-Os-4-200}"
diff --git a/gcc-4.9/gcc/config/sh/iterators.md b/gcc-4.9/gcc/config/sh/iterators.md
new file mode 100644
index 000000000..5f020c72a
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/iterators.md
@@ -0,0 +1,46 @@
+;; Iterator definitions for GCC SH machine description files.
+;; Copyright (C) 2012-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_mode_iterator QIHISIDI [QI HI SI DI])
+(define_mode_iterator QIHISI [QI HI SI])
+(define_mode_iterator QIHI [QI HI])
+(define_mode_iterator HISI [HI SI])
+(define_mode_iterator SIDI [SI DI])
+
+;; Mode attributes that can be used as the instruction suffix for mode
+;; variant instructions.
+(define_mode_attr bw [(QI "b") (HI "w")])
+(define_mode_attr bwl [(QI "b") (HI "w") (SI "l")])
+
+;; Sign/zero-extension code iterator.
+(define_code_iterator SZ_EXTEND [sign_extend zero_extend])
+
+;; Mode attributes for mov.b and mov.w displacement constraints.
+(define_mode_attr disp04 [(QI "K04") (HI "K05")])
+(define_mode_attr disp12 [(QI "K12") (HI "K13")])
+
+;; Return codes.
+(define_code_iterator any_return [return simple_return])
+
+;; Lowpart subreg byte position code attributes for big and little endian.
+(define_mode_attr lowpart_be [(QI "3") (HI "2")])
+(define_mode_attr lowpart_le [(QI "0") (HI "0")])
+
+;; Signed minimum/maximum code iterator.
+(define_code_iterator SMIN_SMAX [smin smax])
diff --git a/gcc-4.9/gcc/config/sh/linux.h b/gcc-4.9/gcc/config/sh/linux.h
new file mode 100644
index 000000000..c0a4ebd3e
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/linux.h
@@ -0,0 +1,152 @@
+/* Definitions for SH running Linux-based GNU systems using ELF
+   Copyright (C) 1999-2014 Free Software Foundation, Inc.
+   Contributed by Kazumoto Kojima <kkojima@rr.iij4u.or.jp>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Run-time Target Specification.  */
+
+/* Enable DWARF 2 exceptions.  */
+#undef DWARF2_UNWIND_INFO
+#define DWARF2_UNWIND_INFO 1
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "\
+   %{posix:-D_POSIX_SOURCE} \
+   %{pthread:-D_REENTRANT -D_PTHREADS} \
+"
+
+#define TARGET_OS_CPP_BUILTINS() \
+  do						\
+    {						\
+      GNU_USER_TARGET_OS_CPP_BUILTINS();	\
+    }						\
+  while (0)
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (TARGET_CPU_DEFAULT | TARGET_ENDIAN_DEFAULT | TARGET_OPT_DEFAULT)
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+
+#undef SUBTARGET_LINK_EMUL_SUFFIX
+#define SUBTARGET_LINK_EMUL_SUFFIX "_linux"
+#undef SUBTARGET_LINK_SPEC
+#define SUBTARGET_LINK_SPEC \
+  "%{shared:-shared} \
+   %{!static: \
+     %{rdynamic:-export-dynamic} \
+     -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
+   %{static:-static}"
+
+/* Output assembler code to STREAM to call the profiler.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(STREAM,LABELNO)				\
+  do {									\
+    if (TARGET_SHMEDIA)							\
+      {									\
+	fprintf (STREAM, "\tpt\t1f,tr1\n");				\
+	fprintf (STREAM, "\taddi.l\tr15,-8,r15\n");			\
+	fprintf (STREAM, "\tst.l\tr15,0,r18\n");			\
+	if (flag_pic)							\
+	  {								\
+	    const char *gofs = "(datalabel _GLOBAL_OFFSET_TABLE_-(0f-.))"; \
+	    fprintf (STREAM, "\tmovi\t((%s>>16)&0xffff),r21\n", gofs);	\
+	    fprintf (STREAM, "\tshori\t(%s & 0xffff),r21\n", gofs);	\
+	    fprintf (STREAM, "0:\tptrel/u\tr21,tr0\n");			\
+	    fprintf (STREAM, "\tmovi\t((mcount@GOTPLT)&0xffff),r22\n");	\
+	    fprintf (STREAM, "\tgettr\ttr0,r21\n");			\
+	    fprintf (STREAM, "\tadd.l\tr21,r22,r21\n");			\
+	    fprintf (STREAM, "\tld.l\tr21,0,r21\n");			\
+	    fprintf (STREAM, "\tptabs\tr21,tr0\n");			\
+	  }								\
+	else								\
+	  fprintf (STREAM, "\tpt\tmcount,tr0\n");			\
+	fprintf (STREAM, "\tgettr\ttr1,r18\n");				\
+	fprintf (STREAM, "\tblink\ttr0,r63\n");				\
+	fprintf (STREAM, "1:\tld.l\tr15,0,r18\n");			\
+	fprintf (STREAM, "\taddi.l\tr15,8,r15\n");			\
+      }									\
+    else								\
+      {									\
+	if (flag_pic)							\
+	  {								\
+	    fprintf (STREAM, "\tmov.l\t3f,r1\n");			\
+	    fprintf (STREAM, "\tmova\t3f,r0\n");			\
+	    fprintf (STREAM, "\tadd\tr1,r0\n");				\
+	    fprintf (STREAM, "\tmov.l\t1f,r1\n");			\
+	    fprintf (STREAM, "\tmov.l\t@(r0,r1),r1\n");			\
+	  }								\
+	else								\
+	  fprintf (STREAM, "\tmov.l\t1f,r1\n");				\
+	fprintf (STREAM, "\tsts.l\tpr,@-r15\n");			\
+	fprintf (STREAM, "\tmova\t2f,r0\n");				\
+	fprintf (STREAM, "\tjmp\t@r1\n");				\
+	fprintf (STREAM, "\tlds\tr0,pr\n");				\
+	fprintf (STREAM, "\t.align\t2\n");				\
+	if (flag_pic)							\
+	  {								\
+	    fprintf (STREAM, "1:\t.long\tmcount@GOT\n");		\
+	    fprintf (STREAM, "3:\t.long\t_GLOBAL_OFFSET_TABLE_\n");	\
+	  }								\
+	else								\
+	  fprintf (STREAM, "1:\t.long\tmcount\n");			\
+	fprintf (STREAM, "2:\tlds.l\t@r15+,pr\n");			\
+      }									\
+  } while (0)
+
+/* For SH3 and SH4, we use a slot of the unwind frame which correspond
+   to a fake register number 16 as a placeholder for the return address
+   in MD_FALLBACK_FRAME_STATE_FOR and its content will be read with
+   _Unwind_GetGR which uses dwarf_reg_size_table to get the size of
+   the register.  So the entry of dwarf_reg_size_table corresponding to
+   this slot must be set.  To do this, we redefine DBX_REGISTER_NUMBER
+   so as to return itself for 16.  */
+#undef DBX_REGISTER_NUMBER
+#define DBX_REGISTER_NUMBER(REGNO) \
+  ((! TARGET_SH5 && (REGNO) == 16) ? 16 : SH_DBX_REGISTER_NUMBER (REGNO))
+
+/* Since libgcc is compiled with -fpic for this target, we can't use
+   __sdivsi3_1 as the division strategy for -O0 and -Os.  */
+#undef SH_DIV_STRATEGY_DEFAULT
+#define SH_DIV_STRATEGY_DEFAULT SH_DIV_CALL2
+#undef SH_DIV_STR_FOR_SIZE
+#define SH_DIV_STR_FOR_SIZE "call2"
+
+/* Install the __sync libcalls.  */
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS  sh_init_sync_libfuncs
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+  do									\
+    {									\
+      /* Set default atomic model if it hasn't been specified.  */	\
+      if (global_options_set.x_sh_atomic_model_str == 0)		\
+	{								\
+	  if (TARGET_SH3)						\
+	    sh_atomic_model_str = "soft-gusa";				\
+	  else if (TARGET_SH1)						\
+	    sh_atomic_model_str = "soft-imask";				\
+	}								\
+      /* Set -musermode if it hasn't been specified.  */		\
+      if (global_options_set.x_TARGET_USERMODE == 0)			\
+	TARGET_USERMODE = true;						\
+    }									\
+  while (0)
diff --git a/gcc-4.9/gcc/config/sh/little.h b/gcc-4.9/gcc/config/sh/little.h
new file mode 100644
index 000000000..8ab61ea5a
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/little.h
@@ -0,0 +1,21 @@
+/* Definition of little endian SH machine for GNU compiler.
+
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_ENDIAN_DEFAULT MASK_LITTLE_ENDIAN
diff --git a/gcc-4.9/gcc/config/sh/netbsd-elf.h b/gcc-4.9/gcc/config/sh/netbsd-elf.h
new file mode 100644
index 000000000..8100cee5d
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/netbsd-elf.h
@@ -0,0 +1,106 @@
+/* Definitions for SH running NetBSD using ELF
+   Copyright (C) 2002-2014 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Run-time Target Specification.  */
+
+/* Extra specs needed for NetBSD SuperH ELF targets.  */
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS						\
+  { "netbsd_entry_point", NETBSD_ENTRY_POINT },
+
+
+#define TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+      NETBSD_OS_CPP_BUILTINS_ELF();					\
+      builtin_define ("__NO_LEADING_UNDERSCORES__");			\
+    }									\
+  while (0)
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/sh ELF target.
+   We use the SH_LINK_SPEC from sh/sh.h, and define the appropriate
+   SUBTARGET_LINK_SPEC that pulls in what we need from a generic
+   NetBSD ELF LINK_SPEC.  */
+
+/* LINK_EMUL_PREFIX from sh/elf.h */
+#undef SUBTARGET_LINK_EMUL_SUFFIX
+#define SUBTARGET_LINK_EMUL_SUFFIX "_nbsd"
+
+#undef SUBTARGET_LINK_SPEC
+#define SUBTARGET_LINK_SPEC NETBSD_LINK_SPEC_ELF
+
+#undef LINK_SPEC
+#define LINK_SPEC SH_LINK_SPEC
+
+#define NETBSD_ENTRY_POINT "__start"
+
+/* Provide a CPP_SPEC appropriate for NetBSD.  */
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC NETBSD_CPP_SPEC
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT \
+  (TARGET_CPU_DEFAULT | TARGET_ENDIAN_DEFAULT)
+
+/* Define because we use the label and we do not need them.  */
+#define NO_PROFILE_COUNTERS 1
+ 
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(STREAM,LABELNO)				\
+do									\
+  {									\
+    if (TARGET_SHMEDIA32 || TARGET_SHMEDIA64)				\
+      {									\
+	/* FIXME */							\
+	sorry ("unimplemented-shmedia profiling");			\
+      }									\
+    else								\
+      {									\
+        fprintf((STREAM), "\tmov.l\t%sLP%d,r1\n",			\
+                LOCAL_LABEL_PREFIX, (LABELNO));				\
+        fprintf((STREAM), "\tmova\t%sLP%dr,r0\n",			\
+                LOCAL_LABEL_PREFIX, (LABELNO));				\
+        fprintf((STREAM), "\tjmp\t@r1\n");				\
+        fprintf((STREAM), "\tnop\n");					\
+        fprintf((STREAM), "\t.align\t2\n");				\
+        fprintf((STREAM), "%sLP%d:\t.long\t__mcount\n",			\
+                LOCAL_LABEL_PREFIX, (LABELNO));				\
+        fprintf((STREAM), "%sLP%dr:\n", LOCAL_LABEL_PREFIX, (LABELNO));	\
+      }									\
+  }									\
+while (0)
+
+/* Since libgcc is compiled with -fpic for this target, we can't use
+   __sdivsi3_1 as the division strategy for -O0 and -Os.  */
+#undef SH_DIV_STRATEGY_DEFAULT
+#define SH_DIV_STRATEGY_DEFAULT SH_DIV_CALL2
+#undef SH_DIV_STR_FOR_SIZE
+#define SH_DIV_STR_FOR_SIZE "call2"
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS					\
+  do									\
+    {									\
+      /* Set -musermode if it hasn't been specified.  */		\
+      if (global_options_set.x_TARGET_USERMODE == 0)			\
+	TARGET_USERMODE = true;						\
+    }									\
+  while (0)
diff --git a/gcc-4.9/gcc/config/sh/newlib.h b/gcc-4.9/gcc/config/sh/newlib.h
new file mode 100644
index 000000000..d3fcf150c
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/newlib.h
@@ -0,0 +1,29 @@
+/* Definitions of target machine for gcc for Super-H using sh-superh-elf.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* This header file is used when with_libgloss is enabled during gcc
+   configuration.  */
+
+#undef LIB_SPEC
+#define LIB_SPEC "-lc -lgloss"
+
+#undef  NO_IMPLICIT_EXTERN_C
+#define NO_IMPLICIT_EXTERN_C 1
+
diff --git a/gcc-4.9/gcc/config/sh/predicates.md b/gcc-4.9/gcc/config/sh/predicates.md
new file mode 100644
index 000000000..31f2e1f5a
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/predicates.md
@@ -0,0 +1,1201 @@
+;; Predicate definitions for Renesas / SuperH SH.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; TODO: Add a comment here.
+(define_predicate "trapping_target_operand"
+  (match_code "if_then_else")
+{
+  rtx cond, mem, res, tar, and_expr;
+
+  if (GET_MODE (op) != PDImode)
+    return 0;
+  cond = XEXP (op, 0);
+  mem = XEXP (op, 1);
+  res = XEXP (op, 2);
+  if (!MEM_P (mem)
+      || (GET_CODE (res) != SIGN_EXTEND && GET_CODE (res) != TRUNCATE))
+    return 0;
+  tar = XEXP (res, 0);
+  if (!rtx_equal_p (XEXP (mem, 0), tar)
+      || GET_MODE (tar) != Pmode)
+    return 0;
+  if (GET_CODE (cond) == CONST)
+    {
+      cond = XEXP (cond, 0);
+      if (!satisfies_constraint_Csy (tar))
+	return 0;
+      if (GET_CODE (tar) == CONST)
+	tar = XEXP (tar, 0);
+    }
+  else if (!arith_reg_operand (tar, VOIDmode)
+	   && ! satisfies_constraint_Csy (tar))
+    return 0;
+  if (GET_CODE (cond) != EQ)
+    return 0;
+  and_expr = XEXP (cond, 0);
+  return (GET_CODE (and_expr) == AND
+	  && rtx_equal_p (XEXP (and_expr, 0), tar)
+	  && CONST_INT_P (XEXP (and_expr, 1))
+	  && CONST_INT_P (XEXP (cond, 1))
+	  && INTVAL (XEXP (and_expr, 1)) == 3
+	  && INTVAL (XEXP (cond, 1)) == 3);
+})
+
+;; A logical operand that can be used in an shmedia and insn.
+(define_predicate "and_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (logical_operand (op, mode))
+    return 1;
+
+  /* Check mshflo.l / mshflhi.l opportunities.  */
+  if (TARGET_SHMEDIA
+      && mode == DImode
+      && satisfies_constraint_J16 (op))
+    return 1;
+
+  return 0;
+})
+
+;; Like arith_reg_dest, but this predicate is defined with
+;; define_special_predicate, not define_predicate.
+(define_special_predicate "any_arith_reg_dest"
+  (match_code "subreg,reg")
+{
+  return arith_reg_dest (op, mode);
+})
+
+;; Like register_operand, but this predicate is defined with
+;; define_special_predicate, not define_predicate.
+(define_special_predicate "any_register_operand"
+  (match_code "subreg,reg")
+{
+  return register_operand (op, mode);
+})
+
+;; Returns 1 if OP is a valid source operand for an arithmetic insn.
+(define_predicate "arith_operand"
+  (match_code "subreg,reg,const_int,truncate")
+{
+  if (arith_reg_operand (op, mode))
+    return 1;
+
+  if (TARGET_SHMEDIA)
+    {
+      /* FIXME: We should be checking whether the CONST_INT fits in a
+	 signed 16-bit here, but this causes reload_cse to crash when
+	 attempting to transform a sequence of two 64-bit sets of the
+	 same register from literal constants into a set and an add,
+	 when the difference is too wide for an add.  */
+      if (CONST_INT_P (op)
+	  || satisfies_constraint_Css (op))
+	return 1;
+      else if (GET_CODE (op) == TRUNCATE
+	       && REG_P (XEXP (op, 0))
+	       && ! system_reg_operand (XEXP (op, 0), VOIDmode)
+	       && (mode == VOIDmode || mode == GET_MODE (op))
+	       && (GET_MODE_SIZE (GET_MODE (op))
+		   < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
+	       && (! FP_REGISTER_P (REGNO (XEXP (op, 0)))
+		   || GET_MODE_SIZE (GET_MODE (op)) == 4))
+	return register_operand (XEXP (op, 0), VOIDmode);
+      else
+	return 0;
+    }
+  else if (satisfies_constraint_I08 (op))
+    return 1;
+
+  return 0;
+})
+
+;; Like above, but for DImode destinations: forbid paradoxical DImode
+;; subregs, because this would lead to missing sign extensions when
+;; truncating from DImode to SImode.
+(define_predicate "arith_reg_dest"
+  (match_code "subreg,reg")
+{
+  if (mode == DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
+      && TARGET_SHMEDIA)
+    return 0;
+  return arith_reg_operand (op, mode);
+})
+
+;; Returns 1 if OP is a normal arithmetic register.
+(define_predicate "arith_reg_operand"
+  (match_code "subreg,reg,sign_extend")
+{
+  if (register_operand (op, mode))
+    {
+      int regno;
+
+      if (REG_P (op))
+	regno = REGNO (op);
+      else if (GET_CODE (op) == SUBREG && REG_P (SUBREG_REG (op)))
+	regno = REGNO (SUBREG_REG (op));
+      else
+	return 1;
+
+      return (regno != T_REG && regno != PR_REG
+	      && ! TARGET_REGISTER_P (regno)
+	      && regno != FPUL_REG
+	      && regno != MACH_REG && regno != MACL_REG);
+    }
+  /* Allow a no-op sign extension - compare LOAD_EXTEND_OP.
+     We allow SImode here, as not using an FP register is just a matter of
+     proper register allocation.  */
+  if (TARGET_SHMEDIA
+      && GET_MODE (op) == DImode && GET_CODE (op) == SIGN_EXTEND
+      && GET_MODE (XEXP (op, 0)) == SImode
+      && GET_CODE (XEXP (op, 0)) != SUBREG)
+    return register_operand (XEXP (op, 0), VOIDmode);
+#if 0 /* Can't do this because of PROMOTE_MODE for unsigned vars.  */
+  if (GET_MODE (op) == SImode && GET_CODE (op) == SIGN_EXTEND
+      && GET_MODE (XEXP (op, 0)) == HImode
+      && REG_P (XEXP (op, 0))
+      && REGNO (XEXP (op, 0)) <= LAST_GENERAL_REG)
+    return register_operand (XEXP (op, 0), VOIDmode);
+#endif
+  if (GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_INT
+      && GET_CODE (op) == SUBREG
+      && GET_MODE (SUBREG_REG (op)) == DImode
+      && GET_CODE (SUBREG_REG (op)) == SIGN_EXTEND
+      && GET_MODE (XEXP (SUBREG_REG (op), 0)) == SImode
+      && GET_CODE (XEXP (SUBREG_REG (op), 0)) != SUBREG)
+    return register_operand (XEXP (SUBREG_REG (op), 0), VOIDmode);
+  return 0;
+})
+
+;; Returns 1 if OP is a valid source operand for a compare insn.
+(define_predicate "arith_reg_or_0_operand"
+  (match_code "subreg,reg,const_int,const_vector")
+{
+  if (arith_reg_operand (op, mode))
+    return 1;
+
+  if (satisfies_constraint_Z (op))
+    return 1;
+
+  return 0;
+})
+
+;; Returns true if OP is either a register or constant 0 or constant 1.
+(define_predicate "arith_reg_or_0_or_1_operand"
+  (match_code "subreg,reg,const_int,const_vector")
+{
+  return arith_reg_or_0_operand (op, mode) || satisfies_constraint_M (op);
+})
+
+;; Returns true if OP is a suitable constant for the minimum value of a
+;; clips.b or clips.w insn.
+(define_predicate "clips_min_const_int"
+  (and (match_code "const_int")
+       (ior (match_test "INTVAL (op) == -128")
+	    (match_test "INTVAL (op) == -32768"))))
+
+;; Returns true if OP is a suitable constant for the maximum value of a
+;; clips.b or clips.w insn.
+(define_predicate "clips_max_const_int"
+  (and (match_code "const_int")
+       (ior (match_test "INTVAL (op) == 127")
+	    (match_test "INTVAL (op) == 32767"))))
+
+;; Returns true if OP is a suitable constant for the maximum value of a
+;; clipu.b or clipu.w insn.
+(define_predicate "clipu_max_const_int"
+  (and (match_code "const_int")
+       (ior (match_test "INTVAL (op) == 255")
+	    (match_test "INTVAL (op) == 65535"))))
+
+;; Returns 1 if OP is a floating point operator with two operands.
+(define_predicate "binary_float_operator"
+  (and (match_code "plus,minus,mult,div")
+       (match_test "GET_MODE (op) == mode")))
+
+;; Returns 1 if OP is a logical operator with two operands.
+(define_predicate "binary_logical_operator"
+  (and (match_code "and,ior,xor")
+       (match_test "GET_MODE (op) == mode")))
+
+;; Return 1 if OP is an address suitable for a cache manipulation operation.
+;; MODE has the meaning as in address_operand.
+(define_special_predicate "cache_address_operand"
+  (match_code "plus,reg")
+{
+  if (GET_CODE (op) == PLUS)
+    {
+      if (!REG_P (XEXP (op, 0)))
+	return 0;
+      if (!CONST_INT_P (XEXP (op, 1))
+	  || (INTVAL (XEXP (op, 1)) & 31))
+	return 0;
+    }
+  else if (!REG_P (op))
+    return 0;
+  return address_operand (op, mode);
+})
+
+;; Returns 1 if OP is a valid source operand for shmedia cmpgt / cmpgtu.
+(define_predicate "cmp_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (satisfies_constraint_N (op))
+    return 1;
+  if (TARGET_SHMEDIA
+      && mode != DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
+    return 0;
+  return arith_reg_operand (op, mode);
+})
+
+;; Returns true if OP is an operand that can be used as the first operand in
+;; the cstoresi4 expander pattern.
+(define_predicate "cmpsi_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (REG_P (op) && REGNO (op) == T_REG
+      && GET_MODE (op) == SImode
+      && TARGET_SH1)
+    return 1;
+  return arith_operand (op, mode);
+})
+
+;; Returns true if OP is a comutative float operator.
+;; This predicate is currently unused.
+;;(define_predicate "commutative_float_operator"
+;;  (and (match_code "plus,mult")
+;;       (match_test "GET_MODE (op) == mode")))
+
+;; Returns true if OP is a equal or not equal operator.
+(define_predicate "equality_comparison_operator"
+  (match_code "eq,ne"))
+
+;; Returns true if OP is an arithmetic operand that is zero extended during
+;; an operation.
+(define_predicate "extend_reg_operand"
+  (match_code "subreg,reg,truncate")
+{
+  return (GET_CODE (op) == TRUNCATE
+	  ? arith_operand
+	  : arith_reg_operand) (op, mode);
+})
+
+;; Like extend_reg_operand, but also allow a constant 0.
+(define_predicate "extend_reg_or_0_operand"
+  (match_code "subreg,reg,truncate,const_int")
+{
+  return (GET_CODE (op) == TRUNCATE
+	  ? arith_operand
+	  : arith_reg_or_0_operand) (op, mode);
+})
+
+;; Like arith_reg_operand, but this predicate does not accept SIGN_EXTEND.
+(define_predicate "ext_dest_operand"
+  (match_code "subreg,reg")
+{
+  return arith_reg_operand (op, mode);
+})
+
+;; Returns true if OP can be used as a destination register for shmedia floating
+;; point to integer conversions.
+(define_predicate "fp_arith_reg_dest"
+  (match_code "subreg,reg")
+{
+  if (mode == DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
+    return 0;
+  return fp_arith_reg_operand (op, mode);
+})
+
+;; Returns true if OP is a floating point register that can be used in floating
+;; point arithmetic operations.
+(define_predicate "fp_arith_reg_operand"
+  (match_code "subreg,reg")
+{
+  if (register_operand (op, mode))
+    {
+      int regno;
+
+      if (REG_P (op))
+	regno = REGNO (op);
+      else if (GET_CODE (op) == SUBREG && REG_P (SUBREG_REG (op)))
+	regno = REGNO (SUBREG_REG (op));
+      else
+	return 1;
+
+      return (regno >= FIRST_PSEUDO_REGISTER
+	      || FP_REGISTER_P (regno));
+    }
+  return 0;
+})
+
+;; Returns true if OP is the FPSCR.
+(define_predicate "fpscr_operand"
+  (match_code "reg")
+{
+  return (REG_P (op)
+	  && (REGNO (op) == FPSCR_REG
+	      || (REGNO (op) >= FIRST_PSEUDO_REGISTER
+		  && !(reload_in_progress || reload_completed)))
+	  && GET_MODE (op) == PSImode);
+})
+
+;; Returns true if OP is an operand that is either the fpul hard reg or
+;; a pseudo.  This prevents combine from propagating function arguments
+;; in hard regs into insns that need the operand in fpul.  If it's a pseudo
+;; reload can fix it up.
+(define_predicate "fpul_operand"
+  (match_code "reg")
+{
+  if (TARGET_SHMEDIA)
+    return fp_arith_reg_operand (op, mode);
+
+  return (REG_P (op)
+	  && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
+	  && GET_MODE (op) == mode);
+})
+
+;; Returns true if OP is a valid fpul input operand for the fsca insn.
+;; The value in fpul is a fixed-point value and its scaling is described
+;; in the fsca insn by a mult:SF.  To allow pre-scaled fixed-point inputs
+;; in fpul we have to permit things like
+;;   (reg:SI)
+;;   (fix:SF (float:SF (reg:SI)))
+(define_predicate "fpul_fsca_operand"
+  (match_code "fix,reg")
+{
+  if (fpul_operand (op, SImode))
+    return true;
+  if (GET_CODE (op) == FIX && GET_MODE (op) == SImode
+      && GET_CODE (XEXP (op, 0)) == FLOAT && GET_MODE (XEXP (op, 0)) == SFmode)
+    return fpul_fsca_operand (XEXP (XEXP (op, 0), 0),
+			      GET_MODE (XEXP (XEXP (op, 0), 0)));
+  return false;
+})
+
+;; Returns true if OP is a valid constant scale factor for the fsca insn.
+(define_predicate "fsca_scale_factor"
+  (and (match_code "const_double")
+       (match_test "op == sh_fsca_int2sf ()")))
+
+;; Returns true if OP is an operand that is zero extended during an operation.
+(define_predicate "general_extend_operand"
+  (match_code "subreg,reg,mem,truncate")
+{
+  if (GET_CODE (op) == TRUNCATE)
+    return arith_operand (op, mode);
+
+  if (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))))
+    return general_movsrc_operand (op, mode);
+
+  return nonimmediate_operand (op, mode);
+})
+
+;; Returns 1 if OP is a simple register address.
+(define_predicate "simple_mem_operand"
+  (and (match_code "mem")
+       (match_test "arith_reg_operand (XEXP (op, 0), SImode)")))
+
+;; Returns 1 if OP is a valid displacement address.
+(define_predicate "displacement_mem_operand"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == PLUS")
+       (match_test "arith_reg_operand (XEXP (XEXP (op, 0), 0), SImode)")
+       (match_test "sh_legitimate_index_p (GET_MODE (op),
+					   XEXP (XEXP (op, 0), 1),
+					   TARGET_SH2A, true)")))
+
+;; Returns true if OP is a displacement address that can fit into a
+;; 16 bit (non-SH2A) memory load / store insn.
+(define_predicate "short_displacement_mem_operand"
+  (match_test "sh_disp_addr_displacement (op)
+	       <= sh_max_mov_insn_displacement (GET_MODE (op), false)"))
+
+;; Returns 1 if the operand can be used in an SH2A movu.{b|w} insn.
+(define_predicate "zero_extend_movu_operand"
+  (and (match_operand 0 "displacement_mem_operand")
+       (match_test "GET_MODE (op) == QImode || GET_MODE (op) == HImode")))
+
+;; Returns 1 if the operand can be used in a zero_extend.
+(define_predicate "zero_extend_operand"
+  (ior (and (match_test "TARGET_SHMEDIA")
+	    (match_operand 0 "general_extend_operand"))
+       (and (match_test "! TARGET_SHMEDIA")
+	    (match_operand 0 "arith_reg_operand"))
+       (and (match_test "TARGET_SH2A")
+	    (match_operand 0 "zero_extend_movu_operand"))))
+
+;; Returns 1 if OP can be source of a simple move operation. Same as
+;; general_operand, but a LABEL_REF is valid, PRE_DEC is invalid as
+;; are subregs of system registers.
+(define_predicate "general_movsrc_operand"
+  (match_code "subreg,reg,const_int,const_double,mem,symbol_ref,label_ref,
+	       const,const_vector")
+{
+  if (t_reg_operand (op, mode))
+    return 0;
+
+  /* Disallow PC relative QImode loads, since these is no insn to do that
+     and an imm8 load should be used instead.  */
+  if (IS_PC_RELATIVE_LOAD_ADDR_P (op) && GET_MODE (op) == QImode)
+    return false;
+
+  if (MEM_P (op))
+    {
+      rtx inside = XEXP (op, 0);
+
+      /* Disallow mems with GBR address here.  They have to go through
+	 separate special patterns.  */
+      if ((REG_P (inside) && REGNO (inside) == GBR_REG)
+	  || (GET_CODE (inside) == PLUS && REG_P (XEXP (inside, 0))
+	      && REGNO (XEXP (inside, 0)) == GBR_REG))
+	return 0;
+
+      if (GET_CODE (inside) == CONST)
+	inside = XEXP (inside, 0);
+
+      if (GET_CODE (inside) == LABEL_REF)
+	return 1;
+
+      if (GET_CODE (inside) == PLUS
+	  && GET_CODE (XEXP (inside, 0)) == LABEL_REF
+	  && CONST_INT_P (XEXP (inside, 1)))
+	return 1;
+
+      /* Only post inc allowed.  */
+      if (GET_CODE (inside) == PRE_DEC)
+	return 0;
+    }
+
+  if (mode == GET_MODE (op)
+      && (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))))
+    {
+      rtx mem_rtx = MEM_P (op) ? op : SUBREG_REG (op);
+      rtx x = XEXP (mem_rtx, 0);
+
+      if ((mode == QImode || mode == HImode)
+	  && GET_CODE (x) == PLUS
+	  && REG_P (XEXP (x, 0))
+	  && CONST_INT_P (XEXP (x, 1)))
+	return sh_legitimate_index_p (mode, XEXP (x, 1), TARGET_SH2A, false);
+
+      /* Allow reg+reg addressing here without validating the register
+	 numbers.  Usually one of the regs must be R0 or a pseudo reg.
+	 In some cases it can happen that arguments from hard regs are
+	 propagated directly into address expressions.  In this cases reload
+	 will have to fix it up later.  However, allow this only for native
+	 1, 2 or 4 byte addresses.  */
+      if (can_create_pseudo_p () && GET_CODE (x) == PLUS
+	  && GET_MODE_SIZE (mode) <= 4
+	  && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
+	return true;
+
+      /* 'general_operand' does not allow volatile mems during RTL expansion to
+	 avoid matching arithmetic that operates on mems, it seems.
+	 On SH this leads to redundant sign extensions for QImode or HImode
+	 loads.  Thus we mimic the behavior but allow volatile mems.  */
+        if (memory_address_addr_space_p (GET_MODE (mem_rtx), x,
+					 MEM_ADDR_SPACE (mem_rtx)))
+	  return true;
+    }
+
+  if (TARGET_SHMEDIA
+      && (GET_CODE (op) == PARALLEL || GET_CODE (op) == CONST_VECTOR)
+      && sh_rep_vec (op, mode))
+    return 1;
+  if (TARGET_SHMEDIA && 1
+      && GET_CODE (op) == SUBREG && GET_MODE (op) == mode
+      && SUBREG_REG (op) == const0_rtx && subreg_lowpart_p (op))
+    /* FIXME */ abort (); /* return 1; */
+
+  return general_operand (op, mode);
+})
+
+;; Returns 1 if OP is a MEM that does not use displacement addressing.
+(define_predicate "movsrc_no_disp_mem_operand"
+  (match_code "mem")
+{
+  return general_movsrc_operand (op, mode) && satisfies_constraint_Snd (op);
+})
+
+;; Returns 1 if OP can be a destination of a move. Same as
+;; general_operand, but no preinc allowed.
+(define_predicate "general_movdst_operand"
+  (match_code "subreg,reg,mem")
+{
+  if (t_reg_operand (op, mode))
+    return 0;
+
+  if (MEM_P (op))
+    {
+      rtx inside = XEXP (op, 0);
+      /* Disallow mems with GBR address here.  They have to go through
+	 separate special patterns.  */
+      if ((REG_P (inside) && REGNO (inside) == GBR_REG)
+	  || (GET_CODE (inside) == PLUS && REG_P (XEXP (inside, 0))
+	      && REGNO (XEXP (inside, 0)) == GBR_REG))
+	return 0;
+    }
+
+  /* Only pre dec allowed.  */
+  if (MEM_P (op) && GET_CODE (XEXP (op, 0)) == POST_INC)
+    return 0;
+  if (mode == DImode && TARGET_SHMEDIA && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
+      && ! (reload_in_progress || reload_completed))
+    return 0;
+
+  if (mode == GET_MODE (op)
+      && (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))))
+    {
+      rtx mem_rtx = MEM_P (op) ? op : SUBREG_REG (op);
+      rtx x = XEXP (mem_rtx, 0);
+
+      if ((mode == QImode || mode == HImode)
+	  && GET_CODE (x) == PLUS
+	  && REG_P (XEXP (x, 0))
+	  && CONST_INT_P (XEXP (x, 1)))
+	return sh_legitimate_index_p (mode, XEXP (x, 1), TARGET_SH2A, false);
+
+      /* Allow reg+reg addressing here without validating the register
+	 numbers.  Usually one of the regs must be R0 or a pseudo reg.
+	 In some cases it can happen that arguments from hard regs are
+	 propagated directly into address expressions.  In this cases reload
+	 will have to fix it up later.  However, allow this only for native
+	 1, 2 or 4 byte addresses.  */
+      if (can_create_pseudo_p () && GET_CODE (x) == PLUS
+	  && GET_MODE_SIZE (mode) <= 4
+	  && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
+	return true;
+
+      /* 'general_operand' does not allow volatile mems during RTL expansion to
+	 avoid matching arithmetic that operates on mems, it seems.
+	 On SH this leads to redundant sign extensions for QImode or HImode
+	 stores.  Thus we mimic the behavior but allow volatile mems.  */
+        if (memory_address_addr_space_p (GET_MODE (mem_rtx), x,
+					 MEM_ADDR_SPACE (mem_rtx)))
+	  return true;
+    }
+
+  return general_operand (op, mode);
+})
+
+;; Returns 1 if OP is a POST_INC on stack pointer register.
+(define_predicate "sh_no_delay_pop_operand"
+  (match_code "mem")
+{
+  rtx inside;
+  inside = XEXP (op, 0);
+
+  if (GET_CODE (op) == MEM && GET_MODE (op) == SImode 
+      && GET_CODE (inside) == POST_INC 
+      && GET_CODE (XEXP (inside, 0)) == REG
+      && REGNO (XEXP (inside, 0)) == SP_REG)
+    return 1;
+
+  return 0;
+})
+
+;; Returns 1 if OP is a MEM that can be source of a simple move operation.
+(define_predicate "unaligned_load_operand"
+  (match_code "mem")
+{
+  rtx inside;
+
+  if (!MEM_P (op) || GET_MODE (op) != mode)
+    return 0;
+
+  inside = XEXP (op, 0);
+
+  if (GET_CODE (inside) == POST_INC)
+    inside = XEXP (inside, 0);
+
+  if (REG_P (inside))
+    return 1;
+
+  return 0;
+})
+
+;; Returns 1 if OP is a MEM that can be used in "index_disp" combiner
+;; patterns.
+(define_predicate "mem_index_disp_operand"
+  (match_code "mem")
+{
+  rtx plus0_rtx, plus1_rtx, mult_rtx;
+
+  plus0_rtx = XEXP (op, 0);
+  if (GET_CODE (plus0_rtx) != PLUS)
+    return 0;
+
+  plus1_rtx = XEXP (plus0_rtx, 0);
+  if (GET_CODE (plus1_rtx) != PLUS)
+    return 0;
+  if (! arith_reg_operand (XEXP (plus1_rtx, 1), GET_MODE (XEXP (plus1_rtx, 1))))
+    return 0;
+
+  mult_rtx = XEXP (plus1_rtx, 0);
+  if (GET_CODE (mult_rtx) != MULT)
+    return 0;
+  if (! arith_reg_operand (XEXP (mult_rtx, 0), GET_MODE (XEXP (mult_rtx, 0)))
+      || ! CONST_INT_P (XEXP (mult_rtx, 1)))
+    return 0;
+
+  return exact_log2 (INTVAL (XEXP (mult_rtx, 1))) > 0
+	 && sh_legitimate_index_p (mode, XEXP (plus0_rtx, 1), TARGET_SH2A, true);
+})
+
+;; Returns true if OP is some kind of greater comparision.
+(define_predicate "greater_comparison_operator"
+  (match_code "gt,ge,gtu,geu"))
+
+;; Returns true if OP is an operand suitable for shmedia reload_inqi and
+;; reload_inhi insns.
+(define_predicate "inqhi_operand"
+  (match_code "truncate")
+{
+  if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
+    return 0;
+  op = XEXP (op, 0);
+  /* Can't use true_regnum here because copy_cost wants to know about
+     SECONDARY_INPUT_RELOAD_CLASS.  */
+  return REG_P (op) && FP_REGISTER_P (REGNO (op));
+})
+
+;; Returns true if OP is a general purpose integer register.
+;; This predicate is currently unused.
+;;(define_special_predicate "int_gpr_dest"
+;;  (match_code "subreg,reg")
+;;{
+;;  enum machine_mode op_mode = GET_MODE (op);
+;;
+;;  if (GET_MODE_CLASS (op_mode) != MODE_INT
+;;      || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
+;;    return 0;
+;;  if (! reload_completed)
+;;    return 0;
+;;  return true_regnum (op) <= LAST_GENERAL_REG;
+;;})
+
+;; Returns true if OP is some kind of less comparison.
+(define_predicate "less_comparison_operator"
+  (match_code "lt,le,ltu,leu"))
+
+;; Returns 1 if OP is a valid source operand for a logical operation.
+(define_predicate "logical_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (TARGET_SHMEDIA
+      && mode != DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
+    return 0;
+
+  if (arith_reg_operand (op, mode))
+    return 1;
+
+  if (TARGET_SHMEDIA)
+    {
+      if (satisfies_constraint_I10 (op))
+	return 1;
+      else
+	return 0;
+    }
+  else if (satisfies_constraint_K08 (op))
+    return 1;
+
+  return 0;
+})
+
+;; Like logical_operand but allows additional constant values which can be
+;; done with zero extensions.  Used for the second operand of and insns.
+(define_predicate "logical_and_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (logical_operand (op, mode))
+    return 1;
+
+  if (! TARGET_SHMEDIA
+      && (satisfies_constraint_Jmb (op) || satisfies_constraint_Jmw (op)))
+    return 1;
+
+  return 0;
+})
+
+;; Returns true if OP is a logical operator.
+(define_predicate "logical_operator"
+  (match_code "and,ior,xor"))
+
+;; Like arith_reg_operand, but for register source operands of narrow
+;; logical SHMEDIA operations: forbid subregs of DImode / TImode regs.
+(define_predicate "logical_reg_operand"
+  (match_code "subreg,reg")
+{
+  if (TARGET_SHMEDIA
+      && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4
+      && mode != DImode)
+    return 0;
+  return arith_reg_operand (op, mode);
+})
+
+;; Returns true if OP is a valid bit offset value for the shmedia mextr insns.
+(define_predicate "mextr_bit_offset"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i;
+
+  if (!CONST_INT_P (op))
+    return 0;
+  i = INTVAL (op);
+  return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
+})
+
+;; Returns true if OP is a constant -1, 0 or an zero extended register that
+;; can be used as an operator in the *subsi3_media insn.
+(define_predicate "minuend_operand"
+  (match_code "subreg,reg,truncate,const_int")
+{
+  return op == constm1_rtx || extend_reg_or_0_operand (op, mode);
+})
+
+;; Returns true if OP is a noncommutative floating point operator.
+;; This predicate is currently unused.
+;;(define_predicate "noncommutative_float_operator"
+;;  (and (match_code "minus,div")
+;;       (match_test "GET_MODE (op) == mode")))
+
+;; UNORDERED is only supported on SHMEDIA.
+
+(define_predicate "sh_float_comparison_operator"
+  (ior (match_operand 0 "ordered_comparison_operator")
+       (and (match_test "TARGET_SHMEDIA")
+	    (match_code "unordered"))))
+
+(define_predicate "shmedia_cbranch_comparison_operator"
+  (ior (match_operand 0 "equality_comparison_operator")
+       (match_operand 0 "greater_comparison_operator")))
+
+;; Returns true if OP is a constant vector.
+(define_predicate "sh_const_vec"
+  (match_code "const_vector")
+{
+  int i;
+
+  if (GET_CODE (op) != CONST_VECTOR
+      || (GET_MODE (op) != mode && mode != VOIDmode))
+    return 0;
+  i = XVECLEN (op, 0) - 1;
+  for (; i >= 0; i--)
+    if (!CONST_INT_P (XVECEXP (op, 0, i)))
+      return 0;
+  return 1;
+})
+
+;; Determine if OP is a constant vector matching MODE with only one
+;; element that is not a sign extension.  Two byte-sized elements
+;; count as one.
+(define_predicate "sh_1el_vec"
+  (match_code "const_vector")
+{
+  int unit_size;
+  int i, last, least, sign_ix;
+  rtx sign;
+
+  if (GET_CODE (op) != CONST_VECTOR
+      || (GET_MODE (op) != mode && mode != VOIDmode))
+    return 0;
+  /* Determine numbers of last and of least significant elements.  */
+  last = XVECLEN (op, 0) - 1;
+  least = TARGET_LITTLE_ENDIAN ? 0 : last;
+  if (!CONST_INT_P (XVECEXP (op, 0, least)))
+    return 0;
+  sign_ix = least;
+  if (GET_MODE_UNIT_SIZE (mode) == 1)
+    sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
+  if (!CONST_INT_P (XVECEXP (op, 0, sign_ix)))
+    return 0;
+  unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
+  sign = (INTVAL (XVECEXP (op, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
+	  ? constm1_rtx : const0_rtx);
+  i = XVECLEN (op, 0) - 1;
+  do
+    if (i != least && i != sign_ix && XVECEXP (op, 0, i) != sign)
+      return 0;
+  while (--i);
+  return 1;
+})
+
+;; Like register_operand, but take into account that SHMEDIA can use
+;; the constant zero like a general register.
+(define_predicate "sh_register_operand"
+  (match_code "reg,subreg,const_int,const_double")
+{
+  if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
+    return 1;
+  return register_operand (op, mode);
+})
+
+;; Returns true if OP is a vector which is composed of one element that is
+;; repeated.
+(define_predicate "sh_rep_vec"
+  (match_code "const_vector,parallel")
+{
+  int i;
+  rtx x, y;
+
+  if ((GET_CODE (op) != CONST_VECTOR && GET_CODE (op) != PARALLEL)
+      || (GET_MODE (op) != mode && mode != VOIDmode))
+    return 0;
+  i = XVECLEN (op, 0) - 2;
+  x = XVECEXP (op, 0, i + 1);
+  if (GET_MODE_UNIT_SIZE (mode) == 1)
+    {
+      y = XVECEXP (op, 0, i);
+      for (i -= 2; i >= 0; i -= 2)
+	if (! rtx_equal_p (XVECEXP (op, 0, i + 1), x)
+	    || ! rtx_equal_p (XVECEXP (op, 0, i), y))
+	  return 0;
+    }
+  else
+    for (; i >= 0; i--)
+      if (XVECEXP (op, 0, i) != x)
+	return 0;
+  return 1;
+})
+
+;; Returns true if OP is a valid shift count operand for shift operations.
+(define_predicate "shift_count_operand"
+  (match_code "const_int,const_double,const,symbol_ref,label_ref,subreg,reg,
+	       zero_extend,sign_extend")
+{
+  /* Allow T_REG as shift count for dynamic shifts, although it is not
+     really possible.  It will then be copied to a general purpose reg.  */
+  if (! TARGET_SHMEDIA)
+    return const_int_operand (op, mode) || arith_reg_operand (op, mode)
+	   || (TARGET_DYNSHIFT && t_reg_operand (op, mode));
+
+  return (CONSTANT_P (op)
+	  ? (CONST_INT_P (op)
+	     ? (unsigned) INTVAL (op) < GET_MODE_BITSIZE (mode)
+	     : nonmemory_operand (op, mode))
+	  : shift_count_reg_operand (op, mode));
+})
+
+;; Returns true if OP is a valid shift count operand in a register which can
+;; be used by shmedia shift insns.
+(define_predicate "shift_count_reg_operand"
+  (match_code "subreg,reg,zero_extend,sign_extend")
+{
+  if ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
+       || (GET_CODE (op) == SUBREG && SUBREG_BYTE (op) == 0))
+      && (mode == VOIDmode || mode == GET_MODE (op))
+      && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
+      && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT)
+    {
+      mode = VOIDmode;
+      do
+	op = XEXP (op, 0);
+      while ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
+	      || GET_CODE (op) == TRUNCATE)
+	     && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
+	     && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT);
+
+    }
+  return arith_reg_operand (op, mode);
+})
+
+;; Predicates for matching operands that are constant shift
+;; amounts 1, 2, 8, 16.
+(define_predicate "p27_shift_count_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_P27 (op)")))
+
+(define_predicate "not_p27_shift_count_operand"
+  (and (match_code "const_int")
+       (match_test "! satisfies_constraint_P27 (op)")))
+
+;; For right shifts the constant 1 is a special case because the shlr insn
+;; clobbers the T_REG and is handled by the T_REG clobbering version of the
+;; insn, which is also used for non-P27 shift sequences.
+(define_predicate "p27_rshift_count_operand"
+  (and (match_code "const_int")
+       (match_test "satisfies_constraint_P27 (op)")
+       (match_test "! satisfies_constraint_M (op)")))
+
+(define_predicate "not_p27_rshift_count_operand"
+  (and (match_code "const_int")
+       (ior (match_test "! satisfies_constraint_P27 (op)")
+	    (match_test "satisfies_constraint_M (op)"))))
+
+;; Returns true if OP is some kind of a shift operator.
+(define_predicate "shift_operator"
+  (match_code "ashift,ashiftrt,lshiftrt"))
+
+;; Returns true if OP is a symbol reference.
+(define_predicate "symbol_ref_operand"
+  (match_code "symbol_ref"))
+
+;; Same as target_reg_operand, except that label_refs and symbol_refs
+;; are accepted before reload.
+(define_special_predicate "target_operand"
+  (match_code "subreg,reg,label_ref,symbol_ref,const,unspec")
+{
+  if (mode != VOIDmode && mode != Pmode)
+    return 0;
+
+  if ((GET_MODE (op) == Pmode || GET_MODE (op) == VOIDmode)
+      && satisfies_constraint_Csy (op))
+    return ! reload_completed;
+
+  return target_reg_operand (op, mode);
+})
+
+;; A predicate that accepts pseudos and branch target registers.
+(define_special_predicate "target_reg_operand"
+  (match_code "subreg,reg")
+{
+  if (mode == VOIDmode
+     ? GET_MODE (op) != Pmode && GET_MODE (op) != PDImode
+     : mode != GET_MODE (op))
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = XEXP (op, 0);
+
+  if (!REG_P (op))
+    return 0;
+
+  /* We must protect ourselves from matching pseudos that are virtual
+     register, because they will eventually be replaced with hardware
+     registers that aren't branch-target registers.  */
+  if (REGNO (op) > LAST_VIRTUAL_REGISTER
+      || TARGET_REGISTER_P (REGNO (op)))
+    return 1;
+
+  return 0;
+})
+
+;; Returns true if OP is a valid operand for the shmedia mperm.w insn.
+(define_special_predicate "trunc_hi_operand"
+  (match_code "subreg,reg,truncate")
+{
+  enum machine_mode op_mode = GET_MODE (op);
+
+  if (op_mode != SImode && op_mode != DImode
+      && op_mode != V4HImode && op_mode != V2SImode)
+    return 0;
+  return extend_reg_operand (op, mode);
+})
+
+;; Returns true if OP is an address suitable for an unaligned access
+;; instruction.
+(define_special_predicate "ua_address_operand"
+  (match_code "subreg,reg,plus")
+{
+  if (GET_CODE (op) == PLUS
+      && (! satisfies_constraint_I06 (XEXP (op, 1))))
+    return 0;
+  return address_operand (op, QImode);
+})
+
+;; Returns true if OP is a valid offset for an unaligned memory address.
+(define_predicate "ua_offset"
+  (match_code "const_int")
+{
+  return satisfies_constraint_I06 (op);
+})
+
+;; Returns true if OP is a floating point operator with one operand.
+(define_predicate "unary_float_operator"
+  (and (match_code "abs,neg,sqrt")
+       (match_test "GET_MODE (op) == mode")))
+
+;; Return 1 if OP is a valid source operand for xor.
+(define_predicate "xor_operand"
+  (match_code "subreg,reg,const_int")
+{
+  if (CONST_INT_P (op))
+    return (TARGET_SHMEDIA
+	    ? (satisfies_constraint_I06 (op)
+	       || (!can_create_pseudo_p () && INTVAL (op) == 0xff))
+	    : satisfies_constraint_K08 (op));
+  if (TARGET_SHMEDIA
+      && mode != DImode && GET_CODE (op) == SUBREG
+      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
+    return 0;
+  return arith_reg_operand (op, mode);
+})
+
+(define_predicate "bitwise_memory_operand"
+  (match_code "mem")
+{
+  if (MEM_P (op))
+    {
+      if (REG_P (XEXP (op, 0)))
+	return 1;
+
+      if (GET_CODE (XEXP (op, 0)) == PLUS
+	  && REG_P (XEXP (XEXP (op, 0), 0))
+	  && satisfies_constraint_K12 (XEXP (XEXP (op, 0), 1)))
+        return 1;
+    }
+  return 0;
+})
+
+;; The atomic_* operand predicates are used for the atomic patterns.
+;; Depending on the particular pattern some operands can be immediate
+;; values.  Using these predicates avoids the usage of 'force_reg' in the
+;; expanders.
+(define_predicate "atomic_arith_operand"
+  (ior (match_code "subreg,reg")
+       (and (match_test "satisfies_constraint_I08 (op)")
+	    (match_test "mode != QImode")
+	    (match_test "mode != HImode")
+	    (match_test "TARGET_SH4A_ARCH"))))
+
+(define_predicate "atomic_logical_operand"
+  (ior (match_code "subreg,reg")
+       (and (match_test "satisfies_constraint_K08 (op)")
+	    (match_test "mode != QImode")
+	    (match_test "mode != HImode")
+	    (match_test "TARGET_SH4A_ARCH"))))
+
+;; A predicate describing the T bit register in any form.
+(define_predicate "t_reg_operand"
+  (match_code "reg,subreg,sign_extend,zero_extend")
+{
+  switch (GET_CODE (op))
+    {
+      case REG:
+	return REGNO (op) == T_REG;
+
+      case SUBREG:
+	return REG_P (SUBREG_REG (op)) && REGNO (SUBREG_REG (op)) == T_REG;
+
+      case ZERO_EXTEND:
+      case SIGN_EXTEND:
+	return GET_CODE (XEXP (op, 0)) == SUBREG
+	       && REG_P (SUBREG_REG (XEXP (op, 0)))
+	       && REGNO (SUBREG_REG (XEXP (op, 0))) == T_REG;
+
+      default:
+	return 0;
+    }
+})
+
+;; A predicate describing a negated T bit register.
+(define_predicate "negt_reg_operand"
+  (match_code "subreg,xor")
+{
+  switch (GET_CODE (op))
+    {
+      case XOR:
+	return t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))
+	       && satisfies_constraint_M (XEXP (op, 1));
+
+      case SUBREG:
+	return negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)));
+
+      default:
+	return 0;
+    }
+})
+
+;; A predicate that returns true if OP is a valid construct around the T bit
+;; that can be used as an operand for conditional branches.
+(define_predicate "cbranch_treg_value"
+  (match_code "eq,ne,reg,subreg,xor,sign_extend,zero_extend")
+{
+  return sh_eval_treg_value (op) >= 0;
+})
+
+;; Returns true if OP is arith_reg_operand or t_reg_operand.
+(define_predicate "arith_reg_or_t_reg_operand"
+  (ior (match_operand 0 "arith_reg_operand")
+       (match_operand 0 "t_reg_operand")))
+
+;; A predicate describing the negated value of the T bit register shifted
+;; left by 31.
+(define_predicate "negt_reg_shl31_operand"
+  (match_code "plus,minus,if_then_else")
+{
+  /* (plus:SI (mult:SI (match_operand:SI 1 "t_reg_operand")
+		       (const_int -2147483648))  ;; 0xffffffff80000000
+	      (const_int -2147483648))
+  */
+  if (GET_CODE (op) == PLUS && satisfies_constraint_Jhb (XEXP (op, 1))
+      && GET_CODE (XEXP (op, 0)) == MULT
+      && t_reg_operand (XEXP (XEXP (op, 0), 0), SImode)
+      && satisfies_constraint_Jhb (XEXP (XEXP (op, 0), 1)))
+    return true;
+
+  /* (minus:SI (const_int -2147483648)  ;; 0xffffffff80000000
+	       (mult:SI (match_operand:SI 1 "t_reg_operand")
+			(const_int -2147483648)))
+  */
+  if (GET_CODE (op) == MINUS
+      && satisfies_constraint_Jhb (XEXP (op, 0))
+      && GET_CODE (XEXP (op, 1)) == MULT
+      && t_reg_operand (XEXP (XEXP (op, 1), 0), SImode)
+      && satisfies_constraint_Jhb (XEXP (XEXP (op, 1), 1)))
+    return true;
+
+  /*  (if_then_else:SI (match_operand:SI 1 "t_reg_operand")
+		       (const_int 0)
+		       (const_int -2147483648))  ;; 0xffffffff80000000
+  */
+  if (GET_CODE (op) == IF_THEN_ELSE && t_reg_operand (XEXP (op, 0), SImode)
+      && satisfies_constraint_Z (XEXP (op, 1))
+      && satisfies_constraint_Jhb (XEXP (op, 2)))
+    return true;
+
+  return false;
+})
+
+;; A predicate that determines whether a given constant is a valid
+;; displacement for a GBR load/store of the specified mode.
+(define_predicate "gbr_displacement"
+  (match_code "const_int")
+{
+  const int mode_sz = GET_MODE_SIZE (mode);
+  const int move_sz = mode_sz > GET_MODE_SIZE (SImode)
+				? GET_MODE_SIZE (SImode)
+				: mode_sz;
+  int max_disp = 255 * move_sz;
+  if (mode_sz > move_sz)
+    max_disp -= mode_sz - move_sz;
+
+  return INTVAL (op) >= 0 && INTVAL (op) <= max_disp;
+})
+
+;; A predicate that determines whether OP is a valid GBR addressing mode
+;; memory reference.
+(define_predicate "gbr_address_mem"
+  (match_code "mem")
+{
+  rtx addr = XEXP (op, 0);
+
+  if (REG_P (addr) && REGNO (addr) == GBR_REG)
+    return true;
+  if (GET_CODE (addr) == PLUS
+      && REG_P (XEXP (addr, 0)) && REGNO (XEXP (addr, 0)) == GBR_REG
+      && gbr_displacement (XEXP (addr, 1), mode))
+    return true;
+
+  return false;
+})
diff --git a/gcc-4.9/gcc/config/sh/rtems.h b/gcc-4.9/gcc/config/sh/rtems.h
new file mode 100644
index 000000000..bbedc5b3a
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/rtems.h
@@ -0,0 +1,26 @@
+/* Definitions for rtems targeting a SH using COFF.
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS() do {		\
+  builtin_define( "__rtems__" );		\
+  builtin_assert( "system=rtems" );		\
+} while (0)
diff --git a/gcc-4.9/gcc/config/sh/rtemself.h b/gcc-4.9/gcc/config/sh/rtemself.h
new file mode 100644
index 000000000..25d8b27c4
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/rtemself.h
@@ -0,0 +1,26 @@
+/* Definitions for rtems targeting a SH using elf.
+   Copyright (C) 1997-2014 Free Software Foundation, Inc.
+   Contributed by Joel Sherrill (joel@OARcorp.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Specify predefined symbols in preprocessor.  */
+
+#define TARGET_OS_CPP_BUILTINS() do {		\
+  builtin_define( "__rtems__" );		\
+  builtin_assert( "system=rtems" );		\
+} while (0)
diff --git a/gcc-4.9/gcc/config/sh/sh-c.c b/gcc-4.9/gcc/config/sh/sh-c.c
new file mode 100644
index 000000000..43ff7ad22
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh-c.c
@@ -0,0 +1,148 @@
+/* Pragma handling for GCC for Renesas / SuperH SH.
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+   Contributed by Joern Rennecke <joern.rennecke@st.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "tm_p.h"
+#include "cpplib.h"
+#include "c-family/c-common.h"
+#include "target.h"
+
+/* Handle machine specific pragmas to be semi-compatible with Renesas
+   compiler.  */
+
+/* Add ATTR to the attributes of the current function.  If there is no
+   such function, save it to be added to the attributes of the next
+   function.  */
+static void
+sh_add_function_attribute (const char *attr)
+{
+  tree id = get_identifier (attr);
+
+  if (current_function_decl)
+    decl_attributes (&current_function_decl,
+		     tree_cons (id, NULL_TREE, NULL_TREE), 0);
+  else
+    {
+      *sh_deferred_function_attributes_tail
+	= tree_cons (id, NULL_TREE, *sh_deferred_function_attributes_tail);
+      sh_deferred_function_attributes_tail
+	= &TREE_CHAIN (*sh_deferred_function_attributes_tail);
+    }
+}
+
+void
+sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  sh_add_function_attribute ("interrupt_handler");
+}
+
+void
+sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  sh_add_function_attribute ("trapa_handler");
+}
+
+void
+sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
+{
+  sh_add_function_attribute ("nosave_low_regs");
+}
+
+#define builtin_define(TXT) cpp_define (pfile, TXT)
+#define builtin_assert(TXT) cpp_assert (pfile, TXT)
+
+/* Implement the TARGET_CPU_CPP_BUILTINS macro  */
+void
+sh_cpu_cpp_builtins (cpp_reader* pfile)
+{
+  builtin_define ("__sh__");
+  builtin_assert ("cpu=sh");
+  builtin_assert ("machine=sh");
+  switch ((int) sh_cpu)
+    {
+    case PROCESSOR_SH1:
+      builtin_define ("__sh1__");
+      builtin_define ("__SH1__");
+      break;
+    case PROCESSOR_SH2:
+      builtin_define ("__sh2__");
+      builtin_define ("__SH2__");
+      break;
+    case PROCESSOR_SH2E:
+      builtin_define ("__SH2E__");
+      break;
+    case PROCESSOR_SH2A:
+      builtin_define ("__SH2A__");
+      if (TARGET_SH2A_DOUBLE)
+	builtin_define (TARGET_FPU_SINGLE
+			? "__SH2A_SINGLE__" : "__SH2A_DOUBLE__");
+      else
+	builtin_define (TARGET_FPU_ANY
+			? "__SH2A_SINGLE_ONLY__" : "__SH2A_NOFPU__");
+      break;
+    case PROCESSOR_SH3:
+      builtin_define ("__sh3__");
+      builtin_define ("__SH3__");
+      if (TARGET_HARD_SH4)
+	builtin_define ("__SH4_NOFPU__");
+      break;
+    case PROCESSOR_SH3E:
+      builtin_define (TARGET_HARD_SH4 ? "__SH4_SINGLE_ONLY__" : "__SH3E__");
+      break;
+    case PROCESSOR_SH4:
+      builtin_define (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__");
+      break;
+    case PROCESSOR_SH4A: \
+      builtin_define ("__SH4A__");
+      builtin_define (TARGET_SH4
+		      ? (TARGET_FPU_SINGLE ? "__SH4_SINGLE__" : "__SH4__")
+		      : TARGET_FPU_ANY ? "__SH4_SINGLE_ONLY__"
+		      : "__SH4_NOFPU__");
+      break;
+    case PROCESSOR_SH5:
+      {
+	builtin_define_with_value ("__SH5__",
+				   TARGET_SHMEDIA64 ? "64" : "32", 0);
+	builtin_define_with_value ("__SHMEDIA__",
+				   TARGET_SHMEDIA ? "1" : "0", 0);
+	if (! TARGET_FPU_DOUBLE)
+	  builtin_define ("__SH4_NOFPU__");
+      }
+    }
+  if (TARGET_FPU_ANY)
+    builtin_define ("__SH_FPU_ANY__");
+  if (TARGET_FPU_DOUBLE)
+    builtin_define ("__SH_FPU_DOUBLE__");
+  if (TARGET_HITACHI)
+    builtin_define ("__HITACHI__");
+  if (TARGET_FMOVD)
+    builtin_define ("__FMOVD_ENABLED__");
+  builtin_define (TARGET_LITTLE_ENDIAN
+		  ? "__LITTLE_ENDIAN__" : "__BIG_ENDIAN__");
+
+  cpp_define_formatted (pfile, "__SH_ATOMIC_MODEL_%s__",
+			selected_atomic_model ().cdef_name);
+}
diff --git a/gcc-4.9/gcc/config/sh/sh-mem.cc b/gcc-4.9/gcc/config/sh/sh-mem.cc
new file mode 100644
index 000000000..45af23acb
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh-mem.cc
@@ -0,0 +1,610 @@
+/* Helper routines for memory move and comparison insns.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "machmode.h"
+#include "rtl.h"
+#include "tree.h"
+#include "expr.h"
+#include "tm_p.h"
+#include "basic-block.h"
+
+/* Like force_operand, but guarantees that VALUE ends up in TARGET.  */
+static void
+force_into (rtx value, rtx target)
+{
+  value = force_operand (value, target);
+  if (! rtx_equal_p (value, target))
+    emit_insn (gen_move_insn (target, value));
+}
+
+/* Emit code to perform a block move.  Choose the best method.
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the source.
+   OPERANDS[2] is the size.
+   OPERANDS[3] is the alignment safe to use.  */
+bool
+expand_block_move (rtx *operands)
+{
+  int align = INTVAL (operands[3]);
+  int constp = (CONST_INT_P (operands[2]));
+  int bytes = (constp ? INTVAL (operands[2]) : 0);
+
+  if (! constp)
+    return false;
+
+  /* If we could use mov.l to move words and dest is word-aligned, we
+     can use movua.l for loads and still generate a relatively short
+     and efficient sequence.  */
+  if (TARGET_SH4A_ARCH && align < 4
+      && MEM_ALIGN (operands[0]) >= 32
+      && can_move_by_pieces (bytes, 32))
+    {
+      rtx dest = copy_rtx (operands[0]);
+      rtx src = copy_rtx (operands[1]);
+      /* We could use different pseudos for each copied word, but
+	 since movua can only load into r0, it's kind of
+	 pointless.  */
+      rtx temp = gen_reg_rtx (SImode);
+      rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
+      int copied = 0;
+
+      while (copied + 4 <= bytes)
+	{
+	  rtx to = adjust_address (dest, SImode, copied);
+	  rtx from = adjust_automodify_address (src, BLKmode,
+						src_addr, copied);
+
+	  set_mem_size (from, 4);
+	  emit_insn (gen_movua (temp, from));
+	  emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
+	  emit_move_insn (to, temp);
+	  copied += 4;
+	}
+
+      if (copied < bytes)
+	move_by_pieces (adjust_address (dest, BLKmode, copied),
+			adjust_automodify_address (src, BLKmode,
+						   src_addr, copied),
+			bytes - copied, align, 0);
+
+      return true;
+    }
+
+  /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
+     alignment, or if it isn't a multiple of 4 bytes, then fail.  */
+  if (align < 4 || (bytes % 4 != 0))
+    return false;
+
+  if (TARGET_HARD_SH4)
+    {
+      if (bytes < 12)
+	return false;
+      else if (bytes == 12)
+	{
+	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
+	  rtx r4 = gen_rtx_REG (SImode, 4);
+	  rtx r5 = gen_rtx_REG (SImode, 5);
+
+	  function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
+	  force_into (XEXP (operands[0], 0), r4);
+	  force_into (XEXP (operands[1], 0), r5);
+	  emit_insn (gen_block_move_real_i4 (func_addr_rtx));
+	  return true;
+	}
+      else if (! optimize_size)
+	{
+	  const char *entry_name;
+	  rtx func_addr_rtx = gen_reg_rtx (Pmode);
+	  int dwords;
+	  rtx r4 = gen_rtx_REG (SImode, 4);
+	  rtx r5 = gen_rtx_REG (SImode, 5);
+	  rtx r6 = gen_rtx_REG (SImode, 6);
+
+	  entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
+	  function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
+	  force_into (XEXP (operands[0], 0), r4);
+	  force_into (XEXP (operands[1], 0), r5);
+
+	  dwords = bytes >> 3;
+	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
+	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
+	  return true;
+	}
+      else
+	return false;
+    }
+  if (bytes < 64)
+    {
+      char entry[30];
+      rtx func_addr_rtx = gen_reg_rtx (Pmode);
+      rtx r4 = gen_rtx_REG (SImode, 4);
+      rtx r5 = gen_rtx_REG (SImode, 5);
+
+      sprintf (entry, "__movmemSI%d", bytes);
+      function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
+      force_into (XEXP (operands[0], 0), r4);
+      force_into (XEXP (operands[1], 0), r5);
+      emit_insn (gen_block_move_real (func_addr_rtx));
+      return true;
+    }
+
+  /* This is the same number of bytes as a memcpy call, but to a different
+     less common function name, so this will occasionally use more space.  */
+  if (! optimize_size)
+    {
+      rtx func_addr_rtx = gen_reg_rtx (Pmode);
+      int final_switch, while_loop;
+      rtx r4 = gen_rtx_REG (SImode, 4);
+      rtx r5 = gen_rtx_REG (SImode, 5);
+      rtx r6 = gen_rtx_REG (SImode, 6);
+
+      function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
+      force_into (XEXP (operands[0], 0), r4);
+      force_into (XEXP (operands[1], 0), r5);
+
+      /* r6 controls the size of the move.  16 is decremented from it
+	 for each 64 bytes moved.  Then the negative bit left over is used
+	 as an index into a list of move instructions.  e.g., a 72 byte move
+	 would be set up with size(r6) = 14, for one iteration through the
+	 big while loop, and a switch of -2 for the last part.  */
+
+      final_switch = 16 - ((bytes / 4) % 16);
+      while_loop = ((bytes / 4) / 16 - 1) * 16;
+      emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
+      emit_insn (gen_block_lump_real (func_addr_rtx));
+      return true;
+    }
+
+  return false;
+}
+
+static int prob_unlikely = REG_BR_PROB_BASE / 10;
+static int prob_likely = REG_BR_PROB_BASE / 4;
+
+/* Emit code to perform a strcmp.
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the first string.
+   OPERANDS[2] is the second string.
+   OPERANDS[3] is the known alignment.  */
+bool
+sh_expand_cmpstr (rtx *operands)
+{
+  rtx addr1 = operands[1];
+  rtx addr2 = operands[2];
+  rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
+  rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
+  rtx tmp0 = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (SImode);
+  rtx tmp2 = gen_reg_rtx (SImode);
+  rtx tmp3 = gen_reg_rtx (SImode);
+
+  rtx jump;
+  rtx L_return = gen_label_rtx ();
+  rtx L_loop_byte = gen_label_rtx ();
+  rtx L_end_loop_byte = gen_label_rtx ();
+  rtx L_loop_long = gen_label_rtx ();
+  rtx L_end_loop_long = gen_label_rtx ();
+
+  int align = INTVAL (operands[3]);
+
+  emit_move_insn (tmp0, const0_rtx);
+
+  if (align < 4)
+    {
+      emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
+      emit_insn (gen_tstsi_t (GEN_INT (3), tmp1));
+      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+    }
+
+  addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
+  addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
+
+  /* tmp2 is aligned, OK to load.  */
+  emit_move_insn (tmp3, addr2);
+  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
+
+  /*start long loop.  */
+  emit_label (L_loop_long);
+
+  emit_move_insn (tmp2, tmp3);
+
+  /* tmp1 is aligned, OK to load.  */
+  emit_move_insn (tmp1, addr1);
+  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
+
+  /* Is there a 0 byte ?  */
+  emit_insn (gen_andsi3 (tmp3, tmp3, tmp1));
+
+  emit_insn (gen_cmpstr_t (tmp0, tmp3));
+  jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
+  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+
+  /* tmp2 is aligned, OK to load.  */
+  emit_move_insn (tmp3, addr2);
+  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
+
+  jump = emit_jump_insn (gen_branch_true (L_loop_long));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+  /* end loop.  */
+
+  /* Fallthu, substract words.  */
+  if (TARGET_LITTLE_ENDIAN)
+    {
+      rtx low_1 = gen_lowpart (HImode, tmp1);
+      rtx low_2 = gen_lowpart (HImode, tmp2);
+
+      emit_insn (gen_rotlhi3_8 (low_1, low_1));
+      emit_insn (gen_rotlhi3_8 (low_2, low_2));
+      emit_insn (gen_rotlsi3_16 (tmp1, tmp1));
+      emit_insn (gen_rotlsi3_16 (tmp2, tmp2));
+      emit_insn (gen_rotlhi3_8 (low_1, low_1));
+      emit_insn (gen_rotlhi3_8 (low_2, low_2));
+    }
+
+  jump = emit_jump_insn (gen_jump_compact (L_return));
+  emit_barrier_after (jump);
+
+  emit_label (L_end_loop_long);
+
+  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
+  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
+
+  /* start byte loop.  */
+  addr1 = adjust_address (addr1, QImode, 0);
+  addr2 = adjust_address (addr2, QImode, 0);
+
+  emit_label (L_loop_byte);
+
+  emit_insn (gen_extendqisi2 (tmp2, addr2));
+  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
+
+  emit_insn (gen_extendqisi2 (tmp1, addr1));
+  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
+
+  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
+  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+  if (flag_delayed_branch)
+    emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+  jump = emit_jump_insn (gen_branch_true (L_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+  /* end loop.  */
+
+  emit_label (L_end_loop_byte);
+
+  if (! flag_delayed_branch)
+    emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+  emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
+
+  emit_label (L_return);
+
+  emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
+
+  return true;
+}
+
+/* Emit code to perform a strncmp.
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the first string.
+   OPERANDS[2] is the second string.
+   OPERANDS[3] is the length.
+   OPERANDS[4] is the known alignment.  */
+bool
+sh_expand_cmpnstr (rtx *operands)
+{
+  rtx addr1 = operands[1];
+  rtx addr2 = operands[2];
+  rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
+  rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
+  rtx tmp1 = gen_reg_rtx (SImode);
+  rtx tmp2 = gen_reg_rtx (SImode);
+
+  rtx jump;
+  rtx L_return = gen_label_rtx ();
+  rtx L_loop_byte = gen_label_rtx ();
+  rtx L_end_loop_byte = gen_label_rtx ();
+
+  rtx len = force_reg (SImode, operands[3]);
+  int constp = CONST_INT_P (operands[3]);
+
+  /* Loop on a register count. */
+  if (constp)
+    {
+      rtx tmp0 = gen_reg_rtx (SImode);
+      rtx tmp3 = gen_reg_rtx (SImode);
+      rtx lenw = gen_reg_rtx (SImode);
+
+      rtx L_loop_long = gen_label_rtx ();
+      rtx L_end_loop_long = gen_label_rtx ();
+
+      int align = INTVAL (operands[4]);
+      int bytes = INTVAL (operands[3]);
+      int witers = bytes / 4;
+
+      if (witers > 1)
+        {
+          addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
+          addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
+
+          emit_move_insn (tmp0, const0_rtx);
+
+          if (align < 4)
+            {
+              emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
+              emit_insn (gen_tstsi_t (GEN_INT (3), tmp1));
+              jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+              add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+            }
+
+          /* word count. Do we have iterations ? */
+          emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
+
+          /*start long loop.  */
+          emit_label (L_loop_long);
+
+          /* tmp2 is aligned, OK to load.  */
+          emit_move_insn (tmp2, addr2);
+          emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
+                                                  GET_MODE_SIZE (SImode)));
+
+          /* tmp1 is aligned, OK to load.  */
+          emit_move_insn (tmp1, addr1);
+          emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
+                                                  GET_MODE_SIZE (SImode)));
+
+          /* Is there a 0 byte ?  */
+          emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
+
+          emit_insn (gen_cmpstr_t (tmp0, tmp3));
+          jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
+          add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+          emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+          jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
+          add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+          if (TARGET_SH2)
+            emit_insn (gen_dect (lenw, lenw));
+          else
+            {
+              emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
+              emit_insn (gen_tstsi_t (lenw, lenw));
+            }
+
+          jump = emit_jump_insn (gen_branch_false (L_loop_long));
+          add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+
+         int sbytes = bytes % 4;
+
+          /* end loop.  Reached max iterations.  */
+          if (! sbytes)
+            {
+              jump = emit_jump_insn (gen_jump_compact (L_return));
+              emit_barrier_after (jump);
+            }
+          else
+            {
+              /* Remaining bytes to check.  */
+
+              addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
+              addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
+
+              while (sbytes--)
+                {
+                  emit_insn (gen_extendqisi2 (tmp1, addr1));
+                  emit_insn (gen_extendqisi2 (tmp2, addr2));
+
+                  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
+                  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
+                  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+                  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+                  if (flag_delayed_branch)
+                    emit_insn (gen_zero_extendqisi2 (tmp2,
+                                                     gen_lowpart (QImode,
+                                                                  tmp2)));
+                  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
+                  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+                  addr1 = adjust_address (addr1, QImode,
+                                          GET_MODE_SIZE (QImode));
+                  addr2 = adjust_address (addr2, QImode,
+                                          GET_MODE_SIZE (QImode));
+                }
+
+              jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
+              emit_barrier_after (jump);
+            }
+
+          emit_label (L_end_loop_long);
+
+          /* Found last word.  Restart it byte per byte. */
+
+          emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
+                                                  -GET_MODE_SIZE (SImode)));
+          emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
+                                                  -GET_MODE_SIZE (SImode)));
+
+          /* fall thru.  */
+        }
+
+      addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
+      addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
+
+      while (bytes--)
+        {
+          emit_insn (gen_extendqisi2 (tmp1, addr1));
+          emit_insn (gen_extendqisi2 (tmp2, addr2));
+
+          emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
+          jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
+          add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+          emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+          if (flag_delayed_branch)
+            emit_insn (gen_zero_extendqisi2 (tmp2,
+                                             gen_lowpart (QImode, tmp2)));
+          jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
+          add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+          addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
+          addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
+        }
+
+      jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
+      emit_barrier_after (jump);
+    }
+
+  addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
+  addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
+
+  emit_label (L_loop_byte);
+
+  emit_insn (gen_extendqisi2 (tmp2, addr2));
+  emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 1));
+
+  emit_insn (gen_extendqisi2 (tmp1, addr1));
+  emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 1));
+
+  emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
+  jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+  emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+  if (flag_delayed_branch)
+    emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+  jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+  if (TARGET_SH2)
+    emit_insn (gen_dect (len, len));
+  else
+    {
+      emit_insn (gen_addsi3 (len, len, GEN_INT (-1)));
+      emit_insn (gen_tstsi_t (len, len));
+    }
+
+  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+  /* end byte loop.  */
+
+  emit_label (L_end_loop_byte);
+
+  if (! flag_delayed_branch)
+    emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+  emit_insn (gen_zero_extendqisi2 (tmp1, gen_lowpart (QImode, tmp1)));
+
+  emit_label (L_return);
+
+  emit_insn (gen_subsi3 (operands[0], tmp1, tmp2));
+
+  return true;
+}
+
+/* Emit code to perform a strlen
+
+   OPERANDS[0] is the destination.
+   OPERANDS[1] is the string.
+   OPERANDS[2] is the char to search.
+   OPERANDS[3] is the alignment.  */
+bool
+sh_expand_strlen (rtx *operands)
+{
+  rtx addr1 = operands[1];
+  rtx current_addr = copy_addr_to_reg (XEXP (addr1, 0));
+  rtx start_addr = gen_reg_rtx (Pmode);
+  rtx tmp0 = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (SImode);
+  rtx L_return = gen_label_rtx ();
+  rtx L_loop_byte = gen_label_rtx ();
+
+  rtx jump;
+  rtx L_loop_long = gen_label_rtx ();
+  rtx L_end_loop_long = gen_label_rtx ();
+
+  int align = INTVAL (operands[3]);
+
+  emit_move_insn (operands[0], GEN_INT (-1));
+
+  /* remember start of string.  */
+  emit_move_insn (start_addr, current_addr);
+
+  if (align < 4)
+    {
+      emit_insn (gen_tstsi_t (GEN_INT (3), current_addr));
+      jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+      add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+    }
+
+  emit_move_insn (tmp0, operands[2]);
+
+  addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0);
+
+  /*start long loop.  */
+  emit_label (L_loop_long);
+
+  /* tmp1 is aligned, OK to load.  */
+  emit_move_insn (tmp1, addr1);
+  emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 4));
+
+  /* Is there a 0 byte ?  */
+  emit_insn (gen_cmpstr_t (tmp0, tmp1));
+
+  jump = emit_jump_insn (gen_branch_false (L_loop_long));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+  /* end loop.  */
+
+  emit_label (L_end_loop_long);
+
+  emit_move_insn (current_addr, plus_constant (Pmode, current_addr, -4));
+
+  /* start byte loop.  */
+  addr1 = adjust_address (addr1, QImode, 0);
+
+  emit_label (L_loop_byte);
+
+  emit_insn (gen_extendqisi2 (tmp1, addr1));
+  emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1));
+
+  emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx));
+  jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+  add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+
+  /* end loop.  */
+
+  emit_label (L_return);
+
+  emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1)));
+
+  emit_insn (gen_subsi3 (operands[0], current_addr, start_addr));
+
+  return true;
+}
diff --git a/gcc-4.9/gcc/config/sh/sh-modes.def b/gcc-4.9/gcc/config/sh/sh-modes.def
new file mode 100644
index 000000000..3aa3046e3
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh-modes.def
@@ -0,0 +1,34 @@
+/* SH extra machine modes. 
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The SH uses a partial integer mode to represent the FPSCR register.  */
+PARTIAL_INT_MODE (SI, 22, PSI);
+/* PDI mode is used to represent a function address in a target register.  */
+PARTIAL_INT_MODE (DI, 64, PDI);
+
+/* Vector modes.  */
+VECTOR_MODE  (INT, QI, 2);    /*                 V2QI */
+VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
+VECTOR_MODES (FLOAT, 8);      /*            V4HF V2SF */
+VECTOR_MODES (FLOAT, 16);     /*       V8HF V4SF V2DF */
+VECTOR_MODE (INT, DI, 4);     /*                 V4DI */
+VECTOR_MODE (INT, DI, 8);     /*                 V8DI */
+VECTOR_MODE (FLOAT, SF, 16);  /*                V16SF */
diff --git a/gcc-4.9/gcc/config/sh/sh-protos.h b/gcc-4.9/gcc/config/sh/sh-protos.h
new file mode 100644
index 000000000..defc76a32
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh-protos.h
@@ -0,0 +1,235 @@
+/* Definitions of target machine for GNU compiler for Renesas / SuperH SH.
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com).
+   Improved by Jim Wilson (wilson@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SH_PROTOS_H
+#define GCC_SH_PROTOS_H
+
+enum sh_function_kind {
+  /* A function with normal C ABI  */
+  FUNCTION_ORDINARY,
+  /* A special function that guarantees that some otherwise call-clobbered
+     registers are not clobbered.  These can't go through the SH5 resolver,
+     because it only saves argument passing registers.  */
+  SFUNC_GOT,
+  /* A special function that should be linked statically.  These are typically
+     smaller or not much larger than a PLT entry.
+     Some also have a non-standard ABI which precludes dynamic linking.  */
+  SFUNC_STATIC
+};
+
+/* Atomic model.  */
+struct sh_atomic_model
+{
+  enum enum_type
+  {
+    none = 0,
+    soft_gusa,
+    hard_llcs,
+    soft_tcb,
+    soft_imask,
+
+    num_models
+  };
+
+  /*  If strict is set, disallow mixing of different models, as it would
+      happen on SH4A.  */
+  bool strict;
+  enum_type type;
+
+  /* Name string as it was specified on the command line.  */
+  const char* name;
+
+  /* Name string as it is used in C/C++ defines.  */
+  const char* cdef_name;
+
+  /* GBR offset variable for TCB model.  */
+  int tcb_gbr_offset;
+};
+
+extern const sh_atomic_model& selected_atomic_model (void);
+
+/* Shortcuts to check the currently selected atomic model.  */
+#define TARGET_ATOMIC_ANY \
+  selected_atomic_model ().type != sh_atomic_model::none
+
+#define TARGET_ATOMIC_STRICT \
+  selected_atomic_model ().strict
+
+#define TARGET_ATOMIC_SOFT_GUSA \
+  selected_atomic_model ().type == sh_atomic_model::soft_gusa
+
+#define TARGET_ATOMIC_HARD_LLCS \
+  selected_atomic_model ().type == sh_atomic_model::hard_llcs
+
+#define TARGET_ATOMIC_SOFT_TCB \
+  selected_atomic_model ().type == sh_atomic_model::soft_tcb
+
+#define TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX \
+  GEN_INT (selected_atomic_model ().tcb_gbr_offset)
+
+#define TARGET_ATOMIC_SOFT_IMASK \
+  selected_atomic_model ().type == sh_atomic_model::soft_imask
+
+#ifdef RTX_CODE
+extern rtx sh_fsca_sf2int (void);
+extern rtx sh_fsca_int2sf (void);
+
+/* Declare functions defined in sh.c and used in templates.  */
+
+extern const char *output_branch (int, rtx, rtx *);
+extern const char *output_ieee_ccmpeq (rtx, rtx *);
+extern const char *output_branchy_insn (enum rtx_code, const char *, rtx, rtx *);
+extern const char *output_movedouble (rtx, rtx[], enum machine_mode);
+extern const char *output_movepcrel (rtx, rtx[], enum machine_mode);
+extern const char *output_far_jump (rtx, rtx);
+
+extern rtx sfunc_uses_reg (rtx);
+extern int barrier_align (rtx);
+extern int sh_loop_align (rtx);
+extern bool fp_zero_operand (rtx);
+extern bool fp_one_operand (rtx);
+extern rtx get_fpscr_rtx (void);
+extern bool sh_legitimate_index_p (enum machine_mode, rtx, bool, bool);
+extern bool sh_legitimize_reload_address (rtx *, enum machine_mode, int, int);
+extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
+extern bool nonpic_symbol_mentioned_p (rtx);
+extern void emit_sf_insn (rtx);
+extern void emit_df_insn (rtx);
+extern void output_pic_addr_const (FILE *, rtx);
+extern bool expand_block_move (rtx *);
+extern void prepare_move_operands (rtx[], enum machine_mode mode);
+extern bool sh_expand_cmpstr (rtx *);
+extern bool sh_expand_cmpnstr (rtx *);
+extern bool sh_expand_strlen  (rtx *);
+extern enum rtx_code prepare_cbranch_operands (rtx *, enum machine_mode mode,
+					       enum rtx_code comparison);
+extern void expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int);
+extern bool expand_cbranchdi4 (rtx *operands, enum rtx_code comparison);
+extern void sh_emit_scc_to_t (enum rtx_code, rtx, rtx);
+extern rtx sh_emit_cheap_store_flag (enum machine_mode, enum rtx_code, rtx, rtx);
+extern void sh_emit_compare_and_branch (rtx *, enum machine_mode);
+extern void sh_emit_compare_and_set (rtx *, enum machine_mode);
+extern bool sh_ashlsi_clobbers_t_reg_p (rtx);
+extern bool sh_lshrsi_clobbers_t_reg_p (rtx);
+extern void gen_shifty_op (int, rtx *);
+extern void gen_shifty_hi_op (int, rtx *);
+extern bool expand_ashiftrt (rtx *);
+extern bool sh_dynamicalize_shift_p (rtx);
+extern int shl_and_kind (rtx, rtx, int *);
+extern int shl_and_length (rtx);
+extern int shl_and_scr_length (rtx);
+extern bool gen_shl_and (rtx, rtx, rtx, rtx);
+extern int shl_sext_kind (rtx, rtx, int *);
+extern int shl_sext_length (rtx);
+extern bool gen_shl_sext (rtx, rtx, rtx, rtx);
+extern rtx gen_datalabel_ref (rtx);
+extern int regs_used (rtx, int);
+extern void fixup_addr_diff_vecs (rtx);
+extern int get_dest_uid (rtx, int);
+extern void final_prescan_insn (rtx, rtx *, int);
+extern enum tls_model tls_symbolic_operand (rtx, enum machine_mode);
+extern bool system_reg_operand (rtx, enum machine_mode);
+extern bool reg_unused_after (rtx, rtx);
+extern void expand_sf_unop (rtx (*)(rtx, rtx, rtx), rtx *);
+extern void expand_sf_binop (rtx (*)(rtx, rtx, rtx, rtx), rtx *);
+extern void expand_df_unop (rtx (*)(rtx, rtx, rtx), rtx *);
+extern void expand_df_binop (rtx (*)(rtx, rtx, rtx, rtx), rtx *);
+extern int sh_insn_length_adjustment (rtx);
+extern bool sh_can_redirect_branch (rtx, rtx);
+extern void sh_expand_unop_v2sf (enum rtx_code, rtx, rtx);
+extern void sh_expand_binop_v2sf (enum rtx_code, rtx, rtx, rtx);
+extern bool sh_expand_t_scc (rtx *);
+extern rtx sh_gen_truncate (enum machine_mode, rtx, int);
+extern bool sh_vector_mode_supported_p (enum machine_mode);
+extern bool sh_cfun_trap_exit_p (void);
+extern rtx sh_find_equiv_gbr_addr (rtx cur_insn, rtx mem);
+extern int sh_eval_treg_value (rtx op);
+extern HOST_WIDE_INT sh_disp_addr_displacement (rtx mem_op);
+extern int sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a);
+
+/* Result value of sh_find_set_of_reg.  */
+struct set_of_reg
+{
+  /* The insn where sh_find_set_of_reg stopped looking.
+     Can be NULL_RTX if the end of the insn list was reached.  */
+  rtx insn;
+
+  /* The set rtx of the specified reg if found, NULL_RTX otherwise.  */
+  const_rtx set_rtx;
+
+  /* The set source rtx of the specified reg if found, NULL_RTX otherwise.
+     Usually, this is the most interesting return value.  */
+  rtx set_src;
+};
+
+extern set_of_reg sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx));
+extern bool sh_is_logical_t_store_expr (rtx op, rtx insn);
+extern rtx sh_try_omit_signzero_extend (rtx extended_op, rtx insn);
+#endif /* RTX_CODE */
+
+extern void sh_cpu_cpp_builtins (cpp_reader* pfile);
+
+extern const char *output_jump_label_table (void);
+extern rtx get_t_reg_rtx (void);
+extern rtx get_fpscr_rtx (void);
+extern int sh_media_register_for_return (void);
+extern void sh_expand_prologue (void);
+extern void sh_expand_epilogue (bool);
+extern void sh_set_return_address (rtx, rtx);
+extern int initial_elimination_offset (int, int);
+extern bool fldi_ok (void);
+extern bool sh_hard_regno_rename_ok (unsigned int, unsigned int);
+extern bool sh_cfun_interrupt_handler_p (void);
+extern bool sh_cfun_resbank_handler_p (void);
+extern bool sh_attr_renesas_p (const_tree);
+extern bool sh_cfun_attr_renesas_p (void);
+extern bool sh_cannot_change_mode_class
+	      (enum machine_mode, enum machine_mode, enum reg_class);
+extern bool sh_small_register_classes_for_mode_p (enum machine_mode);
+extern void sh_mark_label (rtx, int);
+extern bool check_use_sfunc_addr (rtx, rtx);
+
+#ifdef HARD_CONST
+extern void fpscr_set_from_mem (int, HARD_REG_SET);
+#endif
+
+extern void sh_pr_interrupt (struct cpp_reader *);
+extern void sh_pr_trapa (struct cpp_reader *);
+extern void sh_pr_nosave_low_regs (struct cpp_reader *);
+extern rtx function_symbol (rtx, const char *, enum sh_function_kind);
+extern rtx sh_get_pr_initial_val (void);
+
+extern void sh_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree,
+				     signed int, enum machine_mode);
+extern rtx sh_dwarf_register_span (rtx);
+
+extern rtx replace_n_hard_rtx (rtx, rtx *, int , int);
+extern int shmedia_cleanup_truncate (rtx *, void *);
+
+extern bool sh_contains_memref_p (rtx);
+extern bool sh_loads_bankedreg_p (rtx);
+extern rtx shmedia_prepare_call_address (rtx fnaddr, int is_sibcall);
+extern int sh2a_get_function_vector_number (rtx);
+extern bool sh2a_is_function_vector_call (rtx);
+extern void sh_fix_range (const char *);
+extern bool sh_hard_regno_mode_ok (unsigned int, enum machine_mode);
+extern bool sh_can_use_simple_return_p (void);
+#endif /* ! GCC_SH_PROTOS_H */
diff --git a/gcc-4.9/gcc/config/sh/sh.c b/gcc-4.9/gcc/config/sh/sh.c
new file mode 100644
index 000000000..6d909c79e
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh.c
@@ -0,0 +1,13504 @@
+/* Output routines for GCC for Renesas / SuperH SH.
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com).
+   Improved by Jim Wilson (wilson@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "insn-config.h"
+#include "rtl.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "flags.h"
+#include "expr.h"
+#include "optabs.h"
+#include "reload.h"
+#include "function.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "diagnostic-core.h"
+#include "recog.h"
+#include "dwarf2.h"
+#include "tm_p.h"
+#include "target.h"
+#include "target-def.h"
+#include "langhooks.h"
+#include "basic-block.h"
+#include "df.h"
+#include "intl.h"
+#include "sched-int.h"
+#include "params.h"
+#include "ggc.h"
+#include "pointer-set.h"
+#include "hash-table.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "tree-eh.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "cfgloop.h"
+#include "alloc-pool.h"
+#include "tm-constrs.h"
+#include "opts.h"
+#include "tree-pass.h"
+#include "pass_manager.h"
+#include "context.h"
+
+#include <sstream>
+#include <vector>
+#include <algorithm>
+
+int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
+
+/* These are some macros to abstract register modes.  */
+#define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
+				 && ((HOST_WIDE_INT)(VALUE)) <= 511)
+
+#define CONST_OK_FOR_ADD(size) \
+  (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
+#define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
+#define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
+#define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
+
+/* Used to simplify the logic below.  Find the attributes wherever
+   they may be.  */
+#define SH_ATTRIBUTES(decl) \
+  (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
+		  : DECL_ATTRIBUTES (decl) \
+		  ? (DECL_ATTRIBUTES (decl)) \
+		  : TYPE_ATTRIBUTES (TREE_TYPE (decl))
+
+/* Set to 1 by expand_prologue() when the function is an interrupt handler.  */
+int current_function_interrupt;
+
+tree sh_deferred_function_attributes;
+tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
+
+/* Global variables for machine-dependent things.  */
+
+/* Which cpu are we scheduling for.  */
+enum processor_type sh_cpu;
+
+/* Definitions used in ready queue reordering for first scheduling pass.  */
+
+/* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID.  */
+static short *regmode_weight[2];
+
+/* Total SFmode and SImode weights of scheduled insns.  */
+static int curr_regmode_pressure[2];
+
+/* Number of r0 life regions.  */
+static int r0_life_regions;
+
+/* If true, skip cycles for Q -> R movement.  */
+static int skip_cycles = 0;
+
+/* Cached value of can_issue_more.  This is cached in sh_variable_issue hook
+   and returned from sh_reorder2.  */
+static short cached_can_issue_more;
+
+/* Unique number for UNSPEC_BBR pattern.  */
+static unsigned int unspec_bbr_uid = 1;
+
+/* Provides the class number of the smallest class containing
+   reg number.  */
+enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
+{
+  R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+  TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
+  TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
+  DF_REGS, DF_REGS, DF_REGS, DF_REGS,
+  DF_REGS, DF_REGS, DF_REGS, DF_REGS,
+  NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
+  MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
+  GENERAL_REGS, GENERAL_REGS,
+};
+
+char sh_register_names[FIRST_PSEUDO_REGISTER] \
+  [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
+
+char sh_additional_register_names[ADDREGNAMES_SIZE] \
+  [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
+  = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
+
+int assembler_dialect;
+
+static bool shmedia_space_reserved_for_target_registers;
+
+static void split_branches (rtx);
+static int branch_dest (rtx);
+static void print_slot (rtx);
+static rtx add_constant (rtx, enum machine_mode, rtx);
+static void dump_table (rtx, rtx);
+static bool broken_move (rtx);
+static bool mova_p (rtx);
+static rtx find_barrier (int, rtx, rtx);
+static bool noncall_uses_reg (rtx, rtx, rtx *);
+static rtx gen_block_redirect (rtx, int, int);
+static void sh_reorg (void);
+static void sh_option_override (void);
+static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
+static rtx frame_insn (rtx);
+static rtx push (int);
+static void pop (int);
+static void push_regs (HARD_REG_SET *, int);
+static int calc_live_regs (HARD_REG_SET *);
+static HOST_WIDE_INT rounded_frame_size (int);
+static bool sh_frame_pointer_required (void);
+static rtx mark_constant_pool_use (rtx);
+static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
+						   int, bool *);
+static tree sh_handle_resbank_handler_attribute (tree *, tree,
+						 tree, int, bool *);
+static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
+							   tree, int, bool *);
+static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
+static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
+static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
+static void sh_print_operand (FILE *, rtx, int);
+static void sh_print_operand_address (FILE *, rtx);
+static bool sh_print_operand_punct_valid_p (unsigned char code);
+static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
+static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static void sh_insert_attributes (tree, tree *);
+static const char *sh_check_pch_target_flags (int);
+static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
+static int sh_adjust_cost (rtx, rtx, rtx, int);
+static int sh_issue_rate (void);
+static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
+static short find_set_regmode_weight (rtx, enum machine_mode);
+static short find_insn_regmode_weight (rtx, enum machine_mode);
+static void find_regmode_weight (basic_block, enum machine_mode);
+static int find_r0_life_regions (basic_block);
+static void  sh_md_init_global (FILE *, int, int);
+static void  sh_md_finish_global (FILE *, int);
+static int rank_for_reorder (const void *, const void *);
+static void swap_reorder (rtx *, int);
+static void ready_reorder (rtx *, int);
+static bool high_pressure (enum machine_mode);
+static int sh_reorder (FILE *, int, rtx *, int *, int);
+static int sh_reorder2 (FILE *, int, rtx *, int *, int);
+static void sh_md_init (FILE *, int, int);
+static int sh_variable_issue (FILE *, int, rtx, int);
+
+static bool sh_function_ok_for_sibcall (tree, tree);
+
+static bool sh_cannot_modify_jumps_p (void);
+static reg_class_t sh_target_reg_class (void);
+static bool sh_optimize_target_register_callee_saved (bool);
+static bool sh_ms_bitfield_layout_p (const_tree);
+
+static void sh_init_builtins (void);
+static tree sh_builtin_decl (unsigned, bool);
+static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+				HOST_WIDE_INT, tree);
+static void sh_file_start (void);
+static bool flow_dependent_p (rtx, rtx);
+static void flow_dependent_p_1 (rtx, const_rtx, void *);
+static int shiftcosts (rtx);
+static int and_xor_ior_costs (rtx, int);
+static int addsubcosts (rtx);
+static int multcosts (rtx);
+static bool unspec_caller_rtx_p (rtx);
+static bool sh_cannot_copy_insn_p (rtx);
+static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
+static int sh_address_cost (rtx, enum machine_mode, addr_space_t, bool);
+static int sh_pr_n_sets (void);
+static rtx sh_allocate_initial_value (rtx);
+static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
+static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
+                                        enum machine_mode,
+                                        struct secondary_reload_info *);
+static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
+static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
+static rtx sh_delegitimize_address (rtx);
+static int shmedia_target_regs_stack_space (HARD_REG_SET *);
+static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
+static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
+static int scavenge_reg (HARD_REG_SET *s);
+struct save_schedule_s;
+static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
+						struct save_schedule_s *, int);
+
+static rtx sh_struct_value_rtx (tree, int);
+static rtx sh_function_value (const_tree, const_tree, bool);
+static bool sh_function_value_regno_p (const unsigned int);
+static rtx sh_libcall_value (enum machine_mode, const_rtx);
+static bool sh_return_in_memory (const_tree, const_tree);
+static rtx sh_builtin_saveregs (void);
+static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
+				       tree, int *, int);
+static bool sh_strict_argument_naming (cumulative_args_t);
+static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
+static tree sh_build_builtin_va_list (void);
+static void sh_va_start (tree, rtx);
+static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
+static bool sh_promote_prototypes (const_tree);
+static enum machine_mode sh_promote_function_mode (const_tree type,
+						   enum machine_mode,
+						   int *punsignedp,
+						   const_tree funtype,
+						   int for_return);
+static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
+				  const_tree, bool);
+static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
+			      const_tree, bool);
+static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
+			         tree, bool);
+static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
+				     const_tree, bool);
+static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
+			    const_tree, bool);
+static bool sh_scalar_mode_supported_p (enum machine_mode);
+static int sh_dwarf_calling_convention (const_tree);
+static void sh_encode_section_info (tree, rtx, int);
+static bool sh2a_function_vector_p (tree);
+static void sh_trampoline_init (rtx, tree, rtx);
+static rtx sh_trampoline_adjust_address (rtx);
+static void sh_conditional_register_usage (void);
+static bool sh_legitimate_constant_p (enum machine_mode, rtx);
+static int mov_insn_size (enum machine_mode, bool);
+static int mov_insn_alignment_mask (enum machine_mode, bool);
+static bool sequence_insn_p (rtx);
+static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
+static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
+					enum machine_mode, bool);
+static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
+
+static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
+
+static const struct attribute_spec sh_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { "interrupt_handler", 0, 0, true,  false, false,
+    sh_handle_interrupt_handler_attribute, false },
+  { "sp_switch",         1, 1, true,  false, false,
+     sh_handle_sp_switch_attribute, false },
+  { "trap_exit",         1, 1, true,  false, false,
+    sh_handle_trap_exit_attribute, false },
+  { "renesas",           0, 0, false, true, false,
+    sh_handle_renesas_attribute, false },
+  { "trapa_handler",     0, 0, true,  false, false,
+    sh_handle_interrupt_handler_attribute, false },
+  { "nosave_low_regs",   0, 0, true,  false, false,
+    sh_handle_interrupt_handler_attribute, false },
+  { "resbank",           0, 0, true,  false, false,
+    sh_handle_resbank_handler_attribute, false },
+  { "function_vector",   1, 1, true,  false, false,
+    sh2a_handle_function_vector_handler_attribute, false },
+  { NULL,                0, 0, false, false, false, NULL, false }
+};
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE sh_attribute_table
+
+/* The next two are used for debug info when compiling with -gdwarf.  */
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
+
+/* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE.  */
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE sh_option_override
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND sh_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
+ 
+#undef TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
+
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
+  hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START sh_file_start
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST sh_register_move_cost
+
+#undef TARGET_INSERT_ATTRIBUTES
+#define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST sh_adjust_cost
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE sh_issue_rate
+
+/* The next 5 hooks have been implemented for reenabling sched1.  With the
+   help of these macros we are limiting the movement of insns in sched1 to
+   reduce the register pressure.  The overall idea is to keep count of SImode
+   and SFmode regs required by already scheduled insns. When these counts
+   cross some threshold values; give priority to insns that free registers.
+   The insn that frees registers is most likely to be the insn with lowest
+   LUID (original insn order); but such an insn might be there in the stalled
+   queue (Q) instead of the ready queue (R).  To solve this, we skip cycles
+   up to a max of 8 cycles so that such insns may move from Q -> R.
+
+   The description of the hooks are as below:
+
+   TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
+   scheduler; it is called inside the sched_init function just after
+   find_insn_reg_weights function call. It is used to calculate the SImode
+   and SFmode weights of insns of basic blocks; much similar to what
+   find_insn_reg_weights does.
+   TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
+
+   TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
+   indicated by TARGET_SCHED_REORDER2; doing this may move insns from
+   (Q)->(R).
+
+   TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
+   high; reorder the ready queue so that the insn with lowest LUID will be
+   issued next.
+
+   TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
+   TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
+
+   TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
+   can be returned from TARGET_SCHED_REORDER2.
+
+   TARGET_SCHED_INIT: Reset the register pressure counting variables.  */
+
+#undef TARGET_SCHED_DFA_NEW_CYCLE
+#define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
+
+#undef TARGET_SCHED_INIT_GLOBAL
+#define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
+
+#undef TARGET_SCHED_FINISH_GLOBAL
+#define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
+
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
+
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER sh_reorder
+
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 sh_reorder2
+
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT sh_md_init
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
+
+#undef TARGET_CANNOT_MODIFY_JUMPS_P
+#define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
+#undef TARGET_BRANCH_TARGET_REGISTER_CLASS
+#define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
+#undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
+#define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
+  sh_optimize_target_register_callee_saved
+
+#undef TARGET_MS_BITFIELD_LAYOUT_P
+#define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS sh_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL sh_builtin_decl
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN sh_expand_builtin
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
+
+#undef TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS sh_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST sh_address_cost
+#undef TARGET_ALLOCATE_INITIAL_VALUE
+#define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
+
+#undef TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE sh_function_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE sh_libcall_value
+#undef TARGET_STRUCT_VALUE_RTX
+#define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY sh_return_in_memory
+
+#undef TARGET_EXPAND_BUILTIN_SAVEREGS
+#define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
+#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
+#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
+#undef TARGET_CALLEE_COPIES
+#define TARGET_CALLEE_COPIES sh_callee_copies
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG sh_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
+
+#undef TARGET_CHECK_PCH_TARGET_FLAGS
+#define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
+
+#undef TARGET_DWARF_CALLING_CONVENTION
+#define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
+
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
+
+/* Return regmode weight for insn.  */
+#define INSN_REGMODE_WEIGHT(INSN, MODE)\
+  regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
+
+/* Return current register pressure for regmode.  */
+#define CURR_REGMODE_PRESSURE(MODE)\
+  curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
+
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO	sh_encode_section_info
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD sh_secondary_reload
+
+#undef  TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P	sh_legitimate_address_p
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		sh_trampoline_init
+#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P	sh_legitimate_constant_p
+
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON	sh_canonicalize_comparison
+
+#undef TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
+
+/* Machine-specific symbol_ref flags.  */
+#define SYMBOL_FLAG_FUNCVEC_FUNCTION	(SYMBOL_FLAG_MACH_DEP << 0)
+
+/* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80.  This value
+   is used by optabs.c atomic op expansion code as well as in sync.md.  */
+#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
+#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+
+/* Information on the currently selected atomic model.
+   This is initialized in sh_option_override.  */
+static sh_atomic_model selected_atomic_model_;
+
+const sh_atomic_model&
+selected_atomic_model (void)
+{
+  return selected_atomic_model_;
+}
+
+static sh_atomic_model
+parse_validate_atomic_model_option (const char* str)
+{
+  const char* model_names[sh_atomic_model::num_models];
+  model_names[sh_atomic_model::none] = "none";
+  model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
+  model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
+  model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
+  model_names[sh_atomic_model::soft_imask] = "soft-imask";
+
+  const char* model_cdef_names[sh_atomic_model::num_models];
+  model_cdef_names[sh_atomic_model::none] = "NONE";
+  model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
+  model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
+  model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
+  model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
+
+  sh_atomic_model ret;
+  ret.type = sh_atomic_model::none;
+  ret.name = model_names[sh_atomic_model::none];
+  ret.cdef_name = model_cdef_names[sh_atomic_model::none];
+  ret.strict = false;
+  ret.tcb_gbr_offset = -1;
+
+  /* Handle empty string as 'none'.  */
+  if (str == NULL || *str == '\0')
+    return ret;
+
+#define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
+
+  std::vector<std::string> tokens;
+  for (std::stringstream ss (str); ss.good (); )
+  {
+    tokens.push_back (std::string ());
+    std::getline (ss, tokens.back (), ',');
+  }
+
+  if (tokens.empty ())
+    err_ret ("invalid atomic model option");
+
+  /* The first token must be the atomic model name.  */
+  {
+    for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
+      if (tokens.front () == model_names[i])
+	{
+	  ret.type = (sh_atomic_model::enum_type)i;
+	  ret.name = model_names[i];
+	  ret.cdef_name = model_cdef_names[i];
+	  goto got_mode_name;
+	}
+
+    err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
+got_mode_name:;
+  }
+
+  /* Go through the remaining tokens.  */
+  for (size_t i = 1; i < tokens.size (); ++i)
+    {
+      if (tokens[i] == "strict")
+	ret.strict = true;
+      else if (tokens[i].find ("gbr-offset=") == 0)
+	{
+	  std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
+	  ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
+	  if (offset_str.empty () || ret.tcb_gbr_offset == -1)
+	    err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
+		     "option", offset_str.c_str ());
+	}
+      else
+	err_ret ("unknown parameter \"%s\" in atomic model option",
+		 tokens[i].c_str ());
+    }
+
+  /* Check that the selection makes sense.  */
+  if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
+    err_ret ("atomic operations are not supported on SHmedia");
+
+  if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
+    err_ret ("atomic model %s is only available on SH3 and SH4 targets",
+	     ret.name);
+
+  if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
+    err_ret ("atomic model %s is only available on SH4A targets", ret.name);
+
+  if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
+    err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
+
+  if (ret.type == sh_atomic_model::soft_tcb
+      && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
+          || (ret.tcb_gbr_offset & 3) != 0))
+    err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
+	     "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
+	     ret.name);
+
+  if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
+    err_ret ("cannot use atomic model %s in user mode", ret.name);
+
+  return ret;
+
+#undef err_ret
+}
+
+/* Register SH specific RTL passes.  */
+extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
+					    const char* name);
+extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
+						  const char* name);
+static void
+register_sh_passes (void)
+{
+  if (!TARGET_SH1)
+    return;
+
+/* Running the sh_treg_combine pass after ce1 generates better code when
+   comparisons are combined and reg-reg moves are introduced, because
+   reg-reg moves will be eliminated afterwards.  However, there are quite
+   some cases where combine will be unable to fold comparison related insns,
+   thus for now don't do it.
+  register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
+		 PASS_POS_INSERT_AFTER, "ce1", 1);
+*/
+
+  /* Run sh_treg_combine pass after combine but before register allocation.  */
+  register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
+		 PASS_POS_INSERT_AFTER, "split1", 1);
+
+  /* Run sh_treg_combine pass after register allocation and basic block
+     reordering as this sometimes creates new opportunities.  */
+  register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
+		 PASS_POS_INSERT_AFTER, "split4", 1);
+
+  /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
+     is known after a conditional branch.
+     This must be done after basic blocks and branch conditions have
+     stabilized and won't be changed by further passes.  */
+  register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
+		 PASS_POS_INSERT_BEFORE, "sched2", 1);
+}
+
+/* Implement TARGET_OPTION_OVERRIDE macro.  Validate and override 
+   various options, and do some machine dependent initialization.  */
+static void
+sh_option_override (void)
+{
+  int regno;
+
+  SUBTARGET_OVERRIDE_OPTIONS;
+  if (optimize > 1 && !optimize_size)
+    target_flags |= MASK_SAVE_ALL_TARGET_REGS;
+
+  /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T.  */
+  TARGET_CBRANCHDI4 = 1;
+  TARGET_CMPEQDI_T = 0;
+
+  sh_cpu = PROCESSOR_SH1;
+  assembler_dialect = 0;
+  if (TARGET_SH2)
+    sh_cpu = PROCESSOR_SH2;
+  if (TARGET_SH2E)
+    sh_cpu = PROCESSOR_SH2E;
+  if (TARGET_SH2A)
+    sh_cpu = PROCESSOR_SH2A;
+  if (TARGET_SH3)
+    sh_cpu = PROCESSOR_SH3;
+  if (TARGET_SH3E)
+    sh_cpu = PROCESSOR_SH3E;
+  if (TARGET_SH4)
+    {
+      assembler_dialect = 1;
+      sh_cpu = PROCESSOR_SH4;
+    }
+  if (TARGET_SH4A_ARCH)
+    {
+      assembler_dialect = 1;
+      sh_cpu = PROCESSOR_SH4A;
+    }
+  if (TARGET_SH5)
+    {
+      sh_cpu = PROCESSOR_SH5;
+      target_flags |= MASK_ALIGN_DOUBLE;
+      if (TARGET_SHMEDIA_FPU)
+	target_flags |= MASK_FMOVD;
+      if (TARGET_SHMEDIA)
+	{
+	  /* There are no delay slots on SHmedia.  */
+	  flag_delayed_branch = 0;
+	  /* Relaxation isn't yet supported for SHmedia */
+	  target_flags &= ~MASK_RELAX;
+	  /* After reload, if conversion does little good but can cause
+	     ICEs:
+	     - find_if_block doesn't do anything for SH because we don't
+	       have conditional execution patterns.  (We use conditional
+	       move patterns, which are handled differently, and only
+	       before reload).
+	     - find_cond_trap doesn't do anything for the SH because we
+	       don't have conditional traps.
+	     - find_if_case_1 uses redirect_edge_and_branch_force in
+	       the only path that does an optimization, and this causes
+	       an ICE when branch targets are in registers.
+	     - find_if_case_2 doesn't do anything for the SHmedia after
+	       reload except when it can redirect a tablejump - and
+	       that's rather rare.  */
+	  flag_if_conversion2 = 0;
+	  if (! strcmp (sh_div_str, "call"))
+	    sh_div_strategy = SH_DIV_CALL;
+	  else if (! strcmp (sh_div_str, "call2"))
+	    sh_div_strategy = SH_DIV_CALL2;
+	  if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
+	    sh_div_strategy = SH_DIV_FP;
+	  else if (! strcmp (sh_div_str, "inv"))
+	    sh_div_strategy = SH_DIV_INV;
+	  else if (! strcmp (sh_div_str, "inv:minlat"))
+	    sh_div_strategy = SH_DIV_INV_MINLAT;
+	  else if (! strcmp (sh_div_str, "inv20u"))
+	    sh_div_strategy = SH_DIV_INV20U;
+	  else if (! strcmp (sh_div_str, "inv20l"))
+	    sh_div_strategy = SH_DIV_INV20L;
+	  else if (! strcmp (sh_div_str, "inv:call2"))
+	    sh_div_strategy = SH_DIV_INV_CALL2;
+	  else if (! strcmp (sh_div_str, "inv:call"))
+	    sh_div_strategy = SH_DIV_INV_CALL;
+	  else if (! strcmp (sh_div_str, "inv:fp"))
+	    {
+	      if (TARGET_FPU_ANY)
+		sh_div_strategy = SH_DIV_INV_FP;
+	      else
+		sh_div_strategy = SH_DIV_INV;
+	    }
+	  TARGET_CBRANCHDI4 = 0;
+	  /* Assembler CFI isn't yet fully supported for SHmedia.  */
+	  flag_dwarf2_cfi_asm = 0;
+	}
+    }
+  else
+    {
+       /* Only the sh64-elf assembler fully supports .quad properly.  */
+       targetm.asm_out.aligned_op.di = NULL;
+       targetm.asm_out.unaligned_op.di = NULL;
+    }
+  if (TARGET_SH1)
+    {
+      if (! strcmp (sh_div_str, "call-div1"))
+	sh_div_strategy = SH_DIV_CALL_DIV1;
+      else if (! strcmp (sh_div_str, "call-fp")
+	       && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
+		   || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
+	sh_div_strategy = SH_DIV_CALL_FP;
+      else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
+	sh_div_strategy = SH_DIV_CALL_TABLE;
+      else
+	/* Pick one that makes most sense for the target in general.
+	   It is not much good to use different functions depending
+	   on -Os, since then we'll end up with two different functions
+	   when some of the code is compiled for size, and some for
+	   speed.  */
+
+	/* SH4 tends to emphasize speed.  */
+	if (TARGET_HARD_SH4)
+	  sh_div_strategy = SH_DIV_CALL_TABLE;
+	/* These have their own way of doing things.  */
+	else if (TARGET_SH2A)
+	  sh_div_strategy = SH_DIV_INTRINSIC;
+	/* ??? Should we use the integer SHmedia function instead?  */
+	else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
+	  sh_div_strategy = SH_DIV_CALL_FP;
+	/* SH1 .. SH3 cores often go into small-footprint systems, so
+	   default to the smallest implementation available.  */
+	else
+	  sh_div_strategy = SH_DIV_CALL_DIV1;
+    }
+  if (!TARGET_SH1)
+    TARGET_PRETEND_CMOVE = 0;
+  if (sh_divsi3_libfunc[0])
+    ; /* User supplied - leave it alone.  */
+  else if (TARGET_DIVIDE_CALL_FP)
+    sh_divsi3_libfunc = "__sdivsi3_i4";
+  else if (TARGET_DIVIDE_CALL_TABLE)
+    sh_divsi3_libfunc = "__sdivsi3_i4i";
+  else if (TARGET_SH5)
+    sh_divsi3_libfunc = "__sdivsi3_1";
+  else
+    sh_divsi3_libfunc = "__sdivsi3";
+  if (sh_branch_cost == -1)
+    {
+      sh_branch_cost = 1;
+
+      /*  The SH1 does not have delay slots, hence we get a pipeline stall
+	  at every branch.  The SH4 is superscalar, so the single delay slot
+	  is not sufficient to keep both pipelines filled.  */
+      if (! TARGET_SH2 || TARGET_HARD_SH4)
+	sh_branch_cost = 2;
+    }
+
+  /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user.  */
+  if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
+    TARGET_ZDCBRANCH = 1;
+
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (! VALID_REGISTER_P (regno))
+      sh_register_names[regno][0] = '\0';
+
+  for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
+    if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
+      sh_additional_register_names[regno][0] = '\0';
+
+  if ((flag_pic && ! TARGET_PREFERGOT)
+      || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
+    flag_no_function_cse = 1;
+
+  if (targetm.small_register_classes_for_mode_p (VOIDmode))
+    {
+      /* Never run scheduling before reload, since that can
+	 break global alloc, and generates slower code anyway due
+	 to the pressure on R0.  */
+      /* Enable sched1 for SH4 if the user explicitly requests.
+	 When sched1 is enabled, the ready queue will be reordered by
+	 the target hooks if pressure is high.  We can not do this for
+	 PIC, SH3 and lower as they give spill failures for R0.  */
+      if (!TARGET_HARD_SH4 || flag_pic)
+	flag_schedule_insns = 0;
+      /* ??? Current exception handling places basic block boundaries
+	 after call_insns.  It causes the high pressure on R0 and gives
+	 spill failures for R0 in reload.  See PR 22553 and the thread
+	 on gcc-patches
+	 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>.  */
+      else if (flag_exceptions)
+	{
+	  if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
+	    warning (0, "ignoring -fschedule-insns because of exception "
+			"handling bug");
+	  flag_schedule_insns = 0;
+	}
+      else if (flag_schedule_insns
+	       && !global_options_set.x_flag_schedule_insns)
+	flag_schedule_insns = 0;
+    }
+
+  /* Unwind info is not correct around the CFG unless either a frame
+     pointer is present or M_A_O_A is set.  Fixing this requires rewriting
+     unwind info generation to be aware of the CFG and propagating states
+     around edges.  */
+  if ((flag_unwind_tables || flag_asynchronous_unwind_tables
+       || flag_exceptions || flag_non_call_exceptions)
+      && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
+    {
+      warning (0, "unwind tables currently require either a frame pointer "
+	       "or -maccumulate-outgoing-args for correctness");
+      TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
+    }
+
+  /* Unwinding with -freorder-blocks-and-partition does not work on this
+     architecture, because it requires far jumps to label crossing between
+     hot/cold sections which are rejected on this architecture.  */
+  if (flag_reorder_blocks_and_partition)
+    {
+      if (flag_exceptions)
+	{
+	  inform (input_location, 
+		  "-freorder-blocks-and-partition does not work with "
+		  "exceptions on this architecture");
+	  flag_reorder_blocks_and_partition = 0;
+	  flag_reorder_blocks = 1;
+	}
+      else if (flag_unwind_tables)
+	{
+	  inform (input_location,
+		  "-freorder-blocks-and-partition does not support unwind "
+		  "info on this architecture");
+	  flag_reorder_blocks_and_partition = 0;
+	  flag_reorder_blocks = 1;
+	}
+    }
+
+  /*  Adjust loop, jump and function alignment values (in bytes), if those
+      were not specified by the user using -falign-loops, -falign-jumps
+      and -falign-functions options.
+      32 bit alignment is better for speed, because instructions can be
+      fetched as a pair from a longword boundary.  For size use 16 bit
+      alignment to get more compact code.
+      Aligning all jumps increases the code size, even if it might
+      result in slightly faster code.  Thus, it is set to the smallest 
+      alignment possible if not specified by the user.  */
+  if (align_loops == 0)
+    {
+      if (TARGET_SH5)
+	align_loops = 8;
+      else
+	align_loops = optimize_size ? 2 : 4;
+    }
+
+  if (align_jumps == 0)
+    {
+      if (TARGET_SHMEDIA)
+	align_jumps = 1 << CACHE_LOG;
+      else
+	align_jumps = 2;
+    }
+  else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
+    align_jumps = TARGET_SHMEDIA ? 4 : 2;
+
+  if (align_functions == 0)
+    {
+      if (TARGET_SHMEDIA)
+	align_functions = optimize_size
+			  ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
+      else
+	align_functions = optimize_size ? 2 : 4;
+    }
+
+  /* The linker relaxation code breaks when a function contains
+     alignments that are larger than that at the start of a
+     compilation unit.  */
+  if (TARGET_RELAX)
+    {
+      int min_align = align_loops > align_jumps ? align_loops : align_jumps;
+
+      /* Also take possible .long constants / mova tables into account.	*/
+      if (min_align < 4)
+	min_align = 4;
+      if (align_functions < min_align)
+	align_functions = min_align;
+    }
+
+  if (flag_unsafe_math_optimizations)
+    {
+      /* Enable fsca insn for SH4A if not otherwise specified by the user.  */
+      if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
+	TARGET_FSCA = 1;
+
+      /* Enable fsrra insn for SH4A if not otherwise specified by the user.  */
+      if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
+	TARGET_FSRRA = 1;
+    }
+
+  /*  Allow fsrra insn only if -funsafe-math-optimizations and
+      -ffinite-math-only is enabled.  */
+  TARGET_FSRRA = TARGET_FSRRA
+		 && flag_unsafe_math_optimizations
+		 && flag_finite_math_only;
+
+  /* If the -mieee option was not explicitly set by the user, turn it on
+     unless -ffinite-math-only was specified.  See also PR 33135.  */
+  if (! global_options_set.x_TARGET_IEEE)
+    TARGET_IEEE = ! flag_finite_math_only;
+
+  if (sh_fixed_range_str)
+    sh_fix_range (sh_fixed_range_str);
+
+  /* This target defaults to strict volatile bitfields.  */
+  if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
+    flag_strict_volatile_bitfields = 1;
+
+  /* Parse atomic model option and make sure it is valid for the current
+     target CPU.  */
+  selected_atomic_model_
+    = parse_validate_atomic_model_option (sh_atomic_model_str);
+
+  register_sh_passes ();
+}
+
+/* Print the operand address in x to the stream.  */
+static void
+sh_print_operand_address (FILE *stream, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case REG:
+    case SUBREG:
+      fprintf (stream, "@%s", reg_names[true_regnum (x)]);
+      break;
+
+    case PLUS:
+      {
+	rtx base = XEXP (x, 0);
+	rtx index = XEXP (x, 1);
+
+	switch (GET_CODE (index))
+	  {
+	  case CONST_INT:
+	    fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
+		     reg_names[true_regnum (base)]);
+	    break;
+
+	  case REG:
+	  case SUBREG:
+	    {
+	      int base_num = true_regnum (base);
+	      int index_num = true_regnum (index);
+
+	      fprintf (stream, "@(r0,%s)",
+		       reg_names[MAX (base_num, index_num)]);
+	      break;
+	    }
+
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+      break;
+
+    case PRE_DEC:
+      fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
+      break;
+
+    case POST_INC:
+      fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
+      break;
+
+    default:
+      x = mark_constant_pool_use (x);
+      output_addr_const (stream, x);
+      break;
+    }
+}
+
+/* Print operand x (an rtx) in assembler syntax to file stream
+   according to modifier code.
+
+   '.'  print a .s if insn needs delay slot
+   ','  print LOCAL_LABEL_PREFIX
+   '@'  print trap, rte or rts depending upon pragma interruptness
+   '#'  output a nop if there is nothing to put in the delay slot
+   '''  print likelihood suffix (/u for unlikely).
+   '>'  print branch target if -fverbose-asm
+   'O'  print a constant without the #
+   'R'  print the LSW of a dp value - changes if in little endian
+   'S'  print the MSW of a dp value - changes if in little endian
+   'T'  print the next word of a dp value - same as 'R' in big endian mode.
+   'M'  SHMEDIA: print an `x' if `m' will print `base,index'.
+        otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
+   'N'  print 'r63' if the operand is (const_int 0).
+   'd'  print a V2SF reg as dN instead of fpN.
+   'm'  print a pair `base,offset' or `base,index', for LD and ST.
+   'U'  Likewise for {LD,ST}{HI,LO}.
+   'V'  print the position of a single bit set.
+   'W'  print the position of a single bit cleared.
+   't'  print a memory address which is a register.
+   'u'  prints the lowest 16 bits of CONST_INT, as an unsigned value.
+   'o'  output an operator.  */
+static void
+sh_print_operand (FILE *stream, rtx x, int code)
+{
+  int regno;
+  enum machine_mode mode;
+
+  switch (code)
+    {
+      tree trapa_attr;
+
+    case '.':
+      if (final_sequence
+	  && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
+	  && get_attr_length (XVECEXP (final_sequence, 0, 1)))
+	fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
+      break;
+    case ',':
+      fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
+      break;
+    case '@':
+      trapa_attr = lookup_attribute ("trap_exit",
+				      DECL_ATTRIBUTES (current_function_decl));
+      if (trapa_attr)
+	fprintf (stream, "trapa	#%ld",
+		 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
+      else if (sh_cfun_interrupt_handler_p ())
+	{
+	  if (sh_cfun_resbank_handler_p ())
+	    fprintf (stream, "resbank\n");
+	  fprintf (stream, "rte");
+	}
+      else
+	fprintf (stream, "rts");
+      break;
+    case '#':
+      /* Output a nop if there's nothing in the delay slot.  */
+      if (dbr_sequence_length () == 0)
+	fprintf (stream, "\n\tnop");
+      break;
+    case '\'':
+      {
+	rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+
+	if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
+	  fputs ("/u", stream);
+	break;
+      }
+    case '>':
+      if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
+	{
+	  fputs ("\t! target: ", stream);
+	  output_addr_const (stream, JUMP_LABEL (current_output_insn));
+	}
+      break;
+    case 'O':
+      x = mark_constant_pool_use (x);
+      output_addr_const (stream, x);
+      break;
+    /* N.B.: %R / %S / %T adjust memory addresses by four.
+       For SHMEDIA, that means they can be used to access the first and
+       second 32 bit part of a 64 bit (or larger) value that
+       might be held in floating point registers or memory.
+       While they can be used to access 64 bit parts of a larger value
+       held in general purpose registers, that won't work with memory -
+       neither for fp registers, since the frxx names are used.  */
+    case 'R':
+      if (REG_P (x) || GET_CODE (x) == SUBREG)
+	{
+	  regno = true_regnum (x);
+	  regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
+	  fputs (reg_names[regno], (stream));
+	}
+      else if (MEM_P (x))
+	{
+	  x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
+	  sh_print_operand_address (stream, XEXP (x, 0));
+	}
+      else
+	{
+	  rtx sub = NULL_RTX;
+
+	  mode = GET_MODE (x);
+	  if (mode == VOIDmode)
+	    mode = DImode;
+	  if (GET_MODE_SIZE (mode) >= 8)
+	    sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
+	  if (sub)
+	    sh_print_operand (stream, sub, 0);
+	  else
+	    output_operand_lossage ("invalid operand to %%R");
+	}
+      break;
+    case 'S':
+      if (REG_P (x) || GET_CODE (x) == SUBREG)
+	{
+	  regno = true_regnum (x);
+	  regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
+	  fputs (reg_names[regno], (stream));
+	}
+      else if (MEM_P (x))
+	{
+	  x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
+	  sh_print_operand_address (stream, XEXP (x, 0));
+	}
+      else
+	{
+	  rtx sub = NULL_RTX;
+
+	  mode = GET_MODE (x);
+	  if (mode == VOIDmode)
+	    mode = DImode;
+	  if (GET_MODE_SIZE (mode) >= 8)
+	    sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
+	  if (sub)
+	    sh_print_operand (stream, sub, 0);
+	  else
+	    output_operand_lossage ("invalid operand to %%S");
+	}
+      break;
+    case 'T':
+      /* Next word of a double.  */
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fputs (reg_names[REGNO (x) + 1], (stream));
+	  break;
+	case MEM:
+	  if (GET_CODE (XEXP (x, 0)) != PRE_DEC
+	      && GET_CODE (XEXP (x, 0)) != POST_INC)
+	    x = adjust_address (x, SImode, 4);
+	  sh_print_operand_address (stream, XEXP (x, 0));
+	  break;
+	default:
+	  break;
+	}
+      break;
+
+    case 't':
+      gcc_assert (MEM_P (x));
+      x = XEXP (x, 0);
+      switch (GET_CODE (x))
+	{
+	case REG:
+	case SUBREG:
+	  sh_print_operand (stream, x, 0);
+	  break;
+	default:
+	  break;
+	}
+      break;
+
+    case 'o':
+      switch (GET_CODE (x))
+	{
+	case PLUS:  fputs ("add", stream); break;
+	case MINUS: fputs ("sub", stream); break;
+	case MULT:  fputs ("mul", stream); break;
+	case DIV:   fputs ("div", stream); break;
+	case EQ:    fputs ("eq",  stream); break;
+	case NE:    fputs ("ne",  stream); break;
+	case GT:  case LT:  fputs ("gt",  stream); break;
+	case GE:  case LE:  fputs ("ge",  stream); break;
+	case GTU: case LTU: fputs ("gtu", stream); break;
+	case GEU: case LEU: fputs ("geu", stream); break;
+	default:
+	  break;
+	}
+      break;
+    case 'M':
+      if (TARGET_SHMEDIA)
+	{
+	  if (MEM_P (x)
+	      && GET_CODE (XEXP (x, 0)) == PLUS
+	      && (REG_P (XEXP (XEXP (x, 0), 1))
+		  || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
+	    fputc ('x', stream);
+	}
+      else
+	{
+	  if (MEM_P (x))
+	    {
+	      switch (GET_MODE (x))
+		{
+		case QImode: fputs (".b", stream); break;
+		case HImode: fputs (".w", stream); break;
+		case SImode: fputs (".l", stream); break;
+		case SFmode: fputs (".s", stream); break;
+		case DFmode: fputs (".d", stream); break;
+		default: gcc_unreachable ();
+		}
+	    }
+	}
+      break;
+
+    case 'm':
+      gcc_assert (MEM_P (x));
+      x = XEXP (x, 0);
+      /* Fall through.  */
+    case 'U':
+      switch (GET_CODE (x))
+	{
+	case REG:
+	case SUBREG:
+	  sh_print_operand (stream, x, 0);
+	  fputs (", 0", stream);
+	  break;
+
+	case PLUS:
+	  sh_print_operand (stream, XEXP (x, 0), 0);
+	  fputs (", ", stream);
+	  sh_print_operand (stream, XEXP (x, 1), 0);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case 'V':
+      {
+	int num = exact_log2 (INTVAL (x));
+	gcc_assert (num >= 0);
+	fprintf (stream, "#%d", num);
+      }
+      break;
+
+    case 'W':
+      {
+	int num = exact_log2 (~INTVAL (x));
+	gcc_assert (num >= 0);
+	fprintf (stream, "#%d", num);
+      }
+      break;
+
+    case 'd':
+      gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
+
+      fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
+      break;
+
+    case 'N':
+      if (x == CONST0_RTX (GET_MODE (x)))
+	{
+	  fprintf ((stream), "r63");
+	  break;
+	}
+      goto default_output;
+    case 'u':
+      if (CONST_INT_P (x))
+	{
+	  fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
+	  break;
+	}
+      /* Fall through.  */
+
+    default_output:
+    default:
+      regno = 0;
+      mode = GET_MODE (x);
+
+      switch (GET_CODE (x))
+	{
+	case TRUNCATE:
+	  {
+	    rtx inner = XEXP (x, 0);
+	    int offset = 0;
+	    enum machine_mode inner_mode;
+
+	    /* We might see SUBREGs with vector mode registers inside.  */
+	    if (GET_CODE (inner) == SUBREG
+		&& (GET_MODE_SIZE (GET_MODE (inner))
+		    == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
+		&& subreg_lowpart_p (inner))
+	      inner = SUBREG_REG (inner);
+	    if (CONST_INT_P (inner))
+	      {
+		x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
+		goto default_output;
+	      }
+	    inner_mode = GET_MODE (inner);
+	    if (GET_CODE (inner) == SUBREG
+		&& (GET_MODE_SIZE (GET_MODE (inner))
+		    < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
+		&& REG_P (SUBREG_REG (inner)))
+	      {
+		offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
+					      GET_MODE (SUBREG_REG (inner)),
+					      SUBREG_BYTE (inner),
+					      GET_MODE (inner));
+		inner = SUBREG_REG (inner);
+	      }
+	    if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
+	      abort ();
+	    /* Floating point register pairs are always big endian;
+	       general purpose registers are 64 bit wide.  */
+	    regno = REGNO (inner);
+	    regno = (HARD_REGNO_NREGS (regno, inner_mode)
+		     - HARD_REGNO_NREGS (regno, mode))
+		     + offset;
+	    x = inner;
+	    goto reg;
+	  }
+	case SIGN_EXTEND:
+	  x = XEXP (x, 0);
+	  goto reg;
+	  /* FIXME: We need this on SHmedia32 because reload generates
+	     some sign-extended HI or QI loads into DImode registers
+	     but, because Pmode is SImode, the address ends up with a
+	     subreg:SI of the DImode register.  Maybe reload should be
+	     fixed so as to apply alter_subreg to such loads?  */
+	case IF_THEN_ELSE:
+	  gcc_assert (trapping_target_operand (x, VOIDmode));
+	  x = XEXP (XEXP (x, 2), 0);
+	  goto default_output;
+	case SUBREG:
+	  gcc_assert (SUBREG_BYTE (x) == 0
+		      && REG_P (SUBREG_REG (x)));
+
+	  x = SUBREG_REG (x);
+	  /* Fall through.  */
+
+	reg:
+	case REG:
+	  regno += REGNO (x);
+	  if (FP_REGISTER_P (regno)
+	      && mode == V16SFmode)
+	    fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
+	  else if (FP_REGISTER_P (REGNO (x))
+		   && mode == V4SFmode)
+	    fprintf ((stream), "fv%s", reg_names[regno] + 2);
+	  else if (REG_P (x)
+		   && mode == V2SFmode)
+	    fprintf ((stream), "fp%s", reg_names[regno] + 2);
+	  else if (FP_REGISTER_P (REGNO (x))
+		   && GET_MODE_SIZE (mode) > 4)
+	    fprintf ((stream), "d%s", reg_names[regno] + 1);
+	  else
+	    fputs (reg_names[regno], (stream));
+	  break;
+
+	case MEM:
+	  output_address (XEXP (x, 0));
+	  break;
+
+	default:
+	  if (TARGET_SH1)
+	    fputc ('#', stream);
+	  output_addr_const (stream, x);
+	  break;
+	}
+      break;
+    }
+}
+
+static bool
+sh_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '.' || code == '#' || code == '@' || code == ','
+	  || code == '$' || code == '\'' || code == '>');
+}
+
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+static bool
+sh_asm_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC)
+    {
+      switch (XINT (x, 1))
+	{
+	case UNSPEC_DATALABEL:
+	  fputs ("datalabel ", file);
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  break;
+	case UNSPEC_PIC:
+	  /* GLOBAL_OFFSET_TABLE or local symbols, no suffix.  */
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  break;
+	case UNSPEC_GOT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOT", file);
+	  break;
+	case UNSPEC_GOTOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTOFF", file);
+	  break;
+	case UNSPEC_PLT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@PLT", file);
+	  break;
+	case UNSPEC_GOTPLT:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTPLT", file);
+	  break;
+	case UNSPEC_DTPOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@DTPOFF", file);
+	  break;
+	case UNSPEC_GOTTPOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@GOTTPOFF", file);
+	  break;
+	case UNSPEC_TPOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("@TPOFF", file);
+	  break;
+	case UNSPEC_CALLER:
+	  {
+	    char name[32];
+	    /* LPCS stands for Label for PIC Call Site.  */
+	    targetm.asm_out.generate_internal_label (name, "LPCS",
+						     INTVAL (XVECEXP (x, 0, 0)));
+	    assemble_name (file, name);
+	  }
+	  break;
+	case UNSPEC_EXTRACT_S16:
+	case UNSPEC_EXTRACT_U16:
+	  {
+	    rtx val, shift;
+
+	    val = XVECEXP (x, 0, 0);
+	    shift = XVECEXP (x, 0, 1);
+	    fputc ('(', file);
+	    if (shift != const0_rtx)
+		fputc ('(', file);
+	    if (GET_CODE (val) == CONST
+		|| GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
+	      {
+		fputc ('(', file);
+		output_addr_const (file, val);
+		fputc (')', file);
+	      }
+	    else
+	      output_addr_const (file, val);
+	    if (shift != const0_rtx)
+	      {
+		fputs (" >> ", file);
+		output_addr_const (file, shift);
+		fputc (')', file);
+	      }
+	    fputs (" & 65535)", file);
+	  }
+	  break;
+	case UNSPEC_SYMOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputc ('-', file);
+	  if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
+	    {
+	      fputc ('(', file);
+	      output_addr_const (file, XVECEXP (x, 0, 1));
+	      fputc (')', file);
+	    }
+	  else
+	    output_addr_const (file, XVECEXP (x, 0, 1));
+	  break;
+	case UNSPEC_PCREL_SYMOFF:
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	  fputs ("-(", file);
+	  output_addr_const (file, XVECEXP (x, 0, 1));
+	  fputs ("-.)", file);
+	  break;
+	default:
+	  return false;
+	}
+      return true;
+    }
+  else
+    return false;
+}
+
+/* Encode symbol attributes of a SYMBOL_REF into its
+   SYMBOL_REF_FLAGS.  */
+static void
+sh_encode_section_info (tree decl, rtx rtl, int first)
+{
+  default_encode_section_info (decl, rtl, first);
+
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && sh2a_function_vector_p (decl) && TARGET_SH2A)
+    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
+}
+
+/* Prepare operands for a move define_expand; specifically, one of the
+   operands must be in a register.  */
+void
+prepare_move_operands (rtx operands[], enum machine_mode mode)
+{
+  if ((mode == SImode || mode == DImode)
+      && flag_pic
+      && ! ((mode == Pmode || mode == ptr_mode)
+	    && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
+    {
+      rtx temp;
+      if (SYMBOLIC_CONST_P (operands[1]))
+	{
+	  if (MEM_P (operands[0]))
+	    operands[1] = force_reg (Pmode, operands[1]);
+	  else if (TARGET_SHMEDIA
+		   && GET_CODE (operands[1]) == LABEL_REF
+		   && target_reg_operand (operands[0], mode))
+	    /* It's ok.  */;
+	  else
+	    {
+	      temp = (!can_create_pseudo_p ()
+		      ? operands[0]
+		      : gen_reg_rtx (Pmode));
+	      operands[1] = legitimize_pic_address (operands[1], mode, temp);
+	    }
+	}
+      else if (GET_CODE (operands[1]) == CONST
+	       && GET_CODE (XEXP (operands[1], 0)) == PLUS
+	       && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
+	{
+	  temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+	  temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
+					 mode, temp);
+	  operands[1] = expand_binop (mode, add_optab, temp,
+				      XEXP (XEXP (operands[1], 0), 1),
+				      (!can_create_pseudo_p ()
+				       ? temp
+				       : gen_reg_rtx (Pmode)),
+				      0, OPTAB_LIB_WIDEN);
+	}
+    }
+
+  if (! reload_in_progress && ! reload_completed)
+    {
+      /* Copy the source to a register if both operands aren't registers.  */
+      if (! register_operand (operands[0], mode)
+	  && ! sh_register_operand (operands[1], mode))
+	operands[1] = copy_to_mode_reg (mode, operands[1]);
+
+      if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
+	{
+	  /* This is like change_address_1 (operands[0], mode, 0, 1) ,
+	     except that we can't use that function because it is static.  */
+	  rtx new_rtx = change_address (operands[0], mode, 0);
+	  MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
+	  operands[0] = new_rtx;
+	}
+
+      /* This case can happen while generating code to move the result
+	 of a library call to the target.  Reject `st r0,@(rX,rY)' because
+	 reload will fail to find a spill register for rX, since r0 is already
+	 being used for the source.  */
+      else if (TARGET_SH1
+	       && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
+	       && MEM_P (operands[0])
+	       && GET_CODE (XEXP (operands[0], 0)) == PLUS
+	       && REG_P (XEXP (XEXP (operands[0], 0), 1)))
+	operands[1] = copy_to_mode_reg (mode, operands[1]);
+    }
+
+  if (mode == Pmode || mode == ptr_mode)
+    {
+      rtx op0, op1, opc;
+      enum tls_model tls_kind;
+
+      op0 = operands[0];
+      op1 = operands[1];
+      if (GET_CODE (op1) == CONST
+	  && GET_CODE (XEXP (op1, 0)) == PLUS
+	  && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
+	      != TLS_MODEL_NONE))
+	{
+	  opc = XEXP (XEXP (op1, 0), 1);
+	  op1 = XEXP (XEXP (op1, 0), 0);
+	}
+      else
+	opc = NULL_RTX;
+
+      if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
+	{
+	  rtx tga_op1, tga_ret, tmp, tmp2;
+
+	  if (! flag_pic
+	      && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
+		  || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
+		  || tls_kind == TLS_MODEL_INITIAL_EXEC))
+	    {
+	      /* Don't schedule insns for getting GOT address when
+		 the first scheduling is enabled, to avoid spill
+		 failures for R0.  */
+	      if (flag_schedule_insns)
+		emit_insn (gen_blockage ());
+	      emit_insn (gen_GOTaddr2picreg ());
+	      emit_use (gen_rtx_REG (SImode, PIC_REG));
+	      if (flag_schedule_insns)
+		emit_insn (gen_blockage ());
+	}
+
+	  switch (tls_kind)
+	    {
+	    case TLS_MODEL_GLOBAL_DYNAMIC:
+	      tga_ret = gen_rtx_REG (Pmode, R0_REG);
+	      emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
+	      tmp = gen_reg_rtx (Pmode);
+	      emit_move_insn (tmp, tga_ret);
+	      op1 = tmp;
+	      break;
+
+	    case TLS_MODEL_LOCAL_DYNAMIC:
+	      tga_ret = gen_rtx_REG (Pmode, R0_REG);
+	      emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
+
+	      tmp = gen_reg_rtx (Pmode);
+	      emit_move_insn (tmp, tga_ret);
+
+	      if (register_operand (op0, Pmode))
+		tmp2 = op0;
+	      else
+		tmp2 = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
+	      op1 = tmp2;
+	      break;
+
+	    case TLS_MODEL_INITIAL_EXEC:
+	      tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
+	      tmp = gen_sym2GOTTPOFF (op1);
+	      emit_insn (gen_tls_initial_exec (tga_op1, tmp));
+	      op1 = tga_op1;
+	      break;
+
+	    case TLS_MODEL_LOCAL_EXEC:
+	      tmp2 = gen_reg_rtx (Pmode);
+	      emit_insn (gen_store_gbr (tmp2));
+	      tmp = gen_reg_rtx (Pmode);
+	      emit_insn (gen_symTPOFF2reg (tmp, op1));
+
+	      if (register_operand (op0, Pmode))
+		op1 = op0;
+	      else
+		op1 = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_addsi3 (op1, tmp, tmp2));
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	  if (opc)
+	    emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
+	  operands[1] = op1;
+	}
+    }
+}
+
+/* Implement the canonicalize_comparison target hook for the combine
+   pass.  For the target hook this function is invoked via
+   sh_canonicalize_comparison.  This function is also re-used to
+   canonicalize comparisons in cbranch pattern expanders.  */
+static void
+sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
+			    enum machine_mode mode,
+			    bool op0_preserve_value)
+{
+  /* When invoked from within the combine pass the mode is not specified,
+     so try to get it from one of the operands.  */
+  if (mode == VOIDmode)
+    mode = GET_MODE (op0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (op1);
+
+  // We need to have a mode to do something useful here.
+  if (mode == VOIDmode)
+    return;
+
+  // Currently, we don't deal with floats here.
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    return;
+
+  // Make sure that the constant operand is the second operand.
+  if (CONST_INT_P (op0) && !CONST_INT_P (op1))
+    {
+      if (op0_preserve_value)
+	return;
+
+      std::swap (op0, op1);
+      cmp = swap_condition (cmp);
+    }
+
+  if (CONST_INT_P (op1))
+    {
+      /* Try to adjust the constant operand in such a way that available
+	 comparison insns can be utilized better and the constant can be
+	 loaded with a 'mov #imm,Rm' insn.  This avoids a load from the
+	 constant pool.  */
+      const HOST_WIDE_INT val = INTVAL (op1);
+
+      /* x > -1		  --> x >= 0
+	 x > 0xFFFFFF7F	  --> x >= 0xFFFFFF80
+	 x <= -1	  --> x < 0
+	 x <= 0xFFFFFF7F  --> x < 0xFFFFFF80  */
+      if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
+	{
+	  cmp = cmp == GT ? GE : LT;
+	  op1 = gen_int_mode (val + 1, mode);
+        }
+
+      /* x >= 1     --> x > 0
+	 x >= 0x80  --> x > 0x7F
+	 x < 1      --> x <= 0
+	 x < 0x80   --> x <= 0x7F  */
+      else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
+	{
+	  cmp = cmp == GE ? GT : LE;
+	  op1 = gen_int_mode (val - 1, mode);
+	}
+
+      /* unsigned x >= 1  --> x != 0
+	 unsigned x < 1   --> x == 0  */
+      else if (val == 1 && (cmp == GEU || cmp == LTU))
+	{
+	  cmp = cmp == GEU ? NE : EQ;
+	  op1 = CONST0_RTX (mode);
+	}
+
+      /* unsigned x >= 0x80  --> unsigned x > 0x7F
+	 unsigned x < 0x80   --> unsigned x < 0x7F  */
+      else if (val == 0x80 && (cmp == GEU || cmp == LTU))
+	{
+	  cmp = cmp == GEU ? GTU : LEU;
+	  op1 = gen_int_mode (val - 1, mode);
+	}
+
+      /* unsigned x > 0   --> x != 0
+	 unsigned x <= 0  --> x == 0  */
+      else if (val == 0 && (cmp == GTU || cmp == LEU))
+	cmp = cmp == GTU ? NE : EQ;
+
+      /* unsigned x > 0x7FFFFFFF   --> signed x < 0
+	 unsigned x <= 0x7FFFFFFF  --> signed x >= 0  */
+      else if (mode == SImode && (cmp == GTU || cmp == LEU)
+	       && val == 0x7FFFFFFF)
+	{
+	  cmp = cmp == GTU ? LT : GE;
+	  op1 = const0_rtx;
+	}
+
+      /* unsigned x >= 0x80000000  --> signed x < 0
+	 unsigned x < 0x80000000   --> signed x >= 0  */
+      else if (mode == SImode && (cmp == GEU || cmp == LTU)
+	       && (unsigned HOST_WIDE_INT)val
+		   == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
+	{
+	  cmp = cmp == GEU ? LT : GE;
+	  op1 = const0_rtx;
+	}
+    }
+}
+
+/* This function implements the canonicalize_comparison target hook.
+   This wrapper around the internally used sh_canonicalize_comparison
+   function is needed to do the enum rtx_code <-> int conversion.
+   Target hooks cannot use enum rtx_code in its definition.  */
+static void
+sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+			    bool op0_preserve_value)
+{
+  enum rtx_code tmp_code = (enum rtx_code)*code;
+  sh_canonicalize_comparison (tmp_code, *op0, *op1,
+			      VOIDmode, op0_preserve_value);
+  *code = (int)tmp_code;
+}
+
+bool
+sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
+{
+  *p1 = T_REG;
+  *p2 = INVALID_REGNUM;
+  return true;
+}
+
+enum rtx_code
+prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
+			  enum rtx_code comparison)
+{
+  /* The scratch reg is only available when this is invoked from within
+     the cbranchdi4_i splitter, through expand_cbranchdi4.  */
+  rtx scratch = NULL_RTX;
+
+  if (comparison == LAST_AND_UNUSED_RTX_CODE)
+    comparison = GET_CODE (operands[0]);
+  else
+    scratch = operands[4];
+
+  sh_canonicalize_comparison (comparison, operands[1], operands[2],
+			      mode, false);
+
+  /* Notice that this function is also invoked after reload by
+     the cbranchdi4_i pattern, through expand_cbranchdi4.  */
+  rtx op1 = operands[1];
+
+  if (can_create_pseudo_p ())
+    operands[1] = force_reg (mode, op1);
+  /* When we are handling DImode comparisons, we want to keep constants so
+     that we can optimize the component comparisons; however, memory loads
+     are better issued as a whole so that they can be scheduled well.
+     SImode equality comparisons allow I08 constants, but only when they
+     compare r0.  Hence, if operands[1] has to be loaded from somewhere else
+     into a register, that register might as well be r0, and we allow the
+     constant.  If it is already in a register, this is likely to be
+     allocated to a different hard register, thus we load the constant into
+     a register unless it is zero.  */
+  if (!REG_P (operands[2])
+      && (!CONST_INT_P (operands[2])
+	  || (mode == SImode && operands[2] != CONST0_RTX (SImode)
+	      && ((comparison != EQ && comparison != NE)
+		  || (REG_P (op1) && REGNO (op1) != R0_REG)
+		  || !satisfies_constraint_I08 (operands[2])))))
+    {
+      if (scratch && GET_MODE (scratch) == mode)
+	{
+	  emit_move_insn (scratch, operands[2]);
+	  operands[2] = scratch;
+	}
+      else if (can_create_pseudo_p ())
+	operands[2] = force_reg (mode, operands[2]);
+    }
+  return comparison;
+}
+
+void
+expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
+{
+  rtx (*branch_expander) (rtx) = gen_branch_true;
+  comparison = prepare_cbranch_operands (operands, SImode, comparison);
+  switch (comparison)
+    {
+    case NE: case LT: case LE: case LTU: case LEU:
+      comparison = reverse_condition (comparison);
+      branch_expander = gen_branch_false;
+    default: ;
+    }
+  emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
+			  gen_rtx_fmt_ee (comparison, SImode,
+					  operands[1], operands[2])));
+  rtx jump = emit_jump_insn (branch_expander (operands[3]));
+  if (probability >= 0)
+    add_int_reg_note (jump, REG_BR_PROB, probability);
+}
+
+/* ??? How should we distribute probabilities when more than one branch
+   is generated.  So far we only have some ad-hoc observations:
+   - If the operands are random, they are likely to differ in both parts.
+   - If comparing items in a hash chain, the operands are random or equal;
+     operation should be EQ or NE.
+   - If items are searched in an ordered tree from the root, we can expect
+     the highpart to be unequal about half of the time; operation should be
+     an inequality comparison, operands non-constant, and overall probability
+     about 50%.  Likewise for quicksort.
+   - Range checks will be often made against constants.  Even if we assume for
+     simplicity an even distribution of the non-constant operand over a
+     sub-range here, the same probability could be generated with differently
+     wide sub-ranges - as long as the ratio of the part of the subrange that
+     is before the threshold to the part that comes after the threshold stays
+     the same.  Thus, we can't really tell anything here;
+     assuming random distribution is at least simple.
+ */
+bool
+expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
+{
+  enum rtx_code msw_taken, msw_skip, lsw_taken;
+  rtx skip_label = NULL_RTX;
+  rtx op1h, op1l, op2h, op2l;
+  int num_branches;
+  int prob, rev_prob;
+  int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
+  rtx scratch = operands[4];
+
+  comparison = prepare_cbranch_operands (operands, DImode, comparison);
+  op1h = gen_highpart_mode (SImode, DImode, operands[1]);
+  op2h = gen_highpart_mode (SImode, DImode, operands[2]);
+  op1l = gen_lowpart (SImode, operands[1]);
+  op2l = gen_lowpart (SImode, operands[2]);
+  msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
+  prob = split_branch_probability;
+  rev_prob = REG_BR_PROB_BASE - prob;
+  switch (comparison)
+    {
+    /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
+       That costs 1 cycle more when the first branch can be predicted taken,
+       but saves us mispredicts because only one branch needs prediction.
+       It also enables generating the cmpeqdi_t-1 pattern.  */
+    case EQ:
+      if (TARGET_CMPEQDI_T)
+	{
+	  emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
+	  emit_jump_insn (gen_branch_true (operands[3]));
+	  return true;
+	}
+      msw_skip = NE;
+      lsw_taken = EQ;
+      if (prob >= 0)
+	{
+	  // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
+	  msw_skip_prob = rev_prob;
+	  if (REG_BR_PROB_BASE <= 65535)
+	    lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
+	  else
+	    {
+	      gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
+	      lsw_taken_prob
+		= (prob
+		   ? (REG_BR_PROB_BASE
+		      - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
+			 / ((HOST_WIDEST_INT) prob << 32)))
+		   : 0);
+	    }
+	}
+      break;
+    case NE:
+      if (TARGET_CMPEQDI_T)
+	{
+	  emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
+	  emit_jump_insn (gen_branch_false (operands[3]));
+	  return true;
+	}
+      msw_taken = NE;
+      msw_taken_prob = prob;
+      lsw_taken = NE;
+      lsw_taken_prob = 0;
+      break;
+    case GTU: case GT:
+      msw_taken = comparison;
+      if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
+	break;
+      if (comparison != GTU || op2h != CONST0_RTX (SImode))
+	msw_skip = swap_condition (msw_taken);
+      lsw_taken = GTU;
+      break;
+    case GEU: case GE:
+      if (op2l == CONST0_RTX (SImode))
+	msw_taken = comparison;
+      else
+	{
+	  msw_taken = comparison == GE ? GT : GTU;
+	  msw_skip = swap_condition (msw_taken);
+	  lsw_taken = GEU;
+	}
+      break;
+    case LTU: case LT:
+      msw_taken = comparison;
+      if (op2l == CONST0_RTX (SImode))
+	break;
+      msw_skip = swap_condition (msw_taken);
+      lsw_taken = LTU;
+      break;
+    case LEU: case LE:
+      if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
+	msw_taken = comparison;
+      else
+	{
+	  lsw_taken = LEU;
+	  if (comparison == LE)
+	    msw_taken = LT;
+	  else if (op2h != CONST0_RTX (SImode))
+	    msw_taken = LTU;
+	  else
+	    {
+	      msw_skip = swap_condition (LTU);
+	      break;
+	    }
+	  msw_skip = swap_condition (msw_taken);
+	}
+      break;
+    default: return false;
+    }
+  num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
+		  + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
+		  + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
+  if (comparison != EQ && comparison != NE && num_branches > 1)
+    {
+      if (!CONSTANT_P (operands[2])
+	  && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
+	  && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
+	{
+	  msw_taken_prob = prob / 2U;
+	  msw_skip_prob
+	    = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
+	  lsw_taken_prob = prob;
+	}
+      else
+	{
+	  msw_taken_prob = prob;
+	  msw_skip_prob = REG_BR_PROB_BASE;
+	  /* ??? If we have a constant op2h, should we use that when
+	     calculating lsw_taken_prob?  */
+	  lsw_taken_prob = prob;
+	}
+    }
+  operands[1] = op1h;
+  operands[2] = op2h;
+  operands[4] = NULL_RTX;
+  if (reload_completed
+      && ! arith_reg_or_0_operand (op2h, SImode)
+      && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
+      && (msw_taken != LAST_AND_UNUSED_RTX_CODE
+	  || msw_skip != LAST_AND_UNUSED_RTX_CODE))
+    {
+      emit_move_insn (scratch, operands[2]);
+      operands[2] = scratch;
+    }
+  if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
+    expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
+  if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
+    {
+      rtx taken_label = operands[3];
+
+      /* Operands were possibly modified, but msw_skip doesn't expect this.
+	 Always use the original ones.  */
+      if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
+	{
+	  operands[1] = op1h;
+	  operands[2] = op2h;
+	  if (reload_completed
+	      && ! arith_reg_or_0_operand (op2h, SImode)
+	      && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
+	    {
+	      emit_move_insn (scratch, operands[2]);
+	      operands[2] = scratch;
+	    }
+	}
+
+      operands[3] = skip_label = gen_label_rtx ();
+      expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
+      operands[3] = taken_label;
+    }
+  operands[1] = op1l;
+  operands[2] = op2l;
+  if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
+    {
+      if (reload_completed
+	  && ! arith_reg_or_0_operand (op2l, SImode)
+	  && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
+	{
+	  emit_move_insn (scratch, operands[2]);
+	  operands[2] = scratch;
+	}
+      expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
+    }
+  if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
+    emit_label (skip_label);
+  return true;
+}
+
+/* Given an operand, return 1 if the evaluated operand plugged into an
+   if_then_else will result in a branch_true, 0 if branch_false, or
+   -1 if neither nor applies.  The truth table goes like this:
+
+       op   | cmpval |   code  | result
+   ---------+--------+---------+--------------------
+      T (0) |   0    |  EQ (1) |  0 = 0 ^ (0 == 1)
+      T (0) |   1    |  EQ (1) |  1 = 0 ^ (1 == 1)
+      T (0) |   0    |  NE (0) |  1 = 0 ^ (0 == 0)
+      T (0) |   1    |  NE (0) |  0 = 0 ^ (1 == 0)
+     !T (1) |   0    |  EQ (1) |  1 = 1 ^ (0 == 1)
+     !T (1) |   1    |  EQ (1) |  0 = 1 ^ (1 == 1)
+     !T (1) |   0    |  NE (0) |  0 = 1 ^ (0 == 0)
+     !T (1) |   1    |  NE (0) |  1 = 1 ^ (1 == 0)  */
+int
+sh_eval_treg_value (rtx op)
+{
+  enum rtx_code code = GET_CODE (op);
+  if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
+    return -1;
+
+  int cmpop = code == EQ ? 1 : 0;
+  int cmpval = INTVAL (XEXP (op, 1));
+  if (cmpval != 0 && cmpval != 1)
+    return -1;
+
+  int t;
+  if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
+    t = 0;
+  else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
+    t = 1;
+  else
+    return -1;
+  
+  return t ^ (cmpval == cmpop);
+}
+
+/* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4.  */
+
+static void
+sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
+{
+  if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      insn = gen_rtx_PARALLEL (VOIDmode,
+		       gen_rtvec (2, insn,
+			          gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
+      (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
+    }
+  else
+    emit_insn (insn);
+}
+
+/* Prepare the operands for an scc instruction; make sure that the
+   compare has been done and the result is in T_REG.  */
+void
+sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx t_reg = get_t_reg_rtx ();
+  enum rtx_code oldcode = code;
+  enum machine_mode mode;
+
+  /* First need a compare insn.  */
+  switch (code)
+    {
+    case NE:
+      /* It isn't possible to handle this case.  */
+      gcc_unreachable ();
+    case LT:
+      code = GT;
+      break;
+    case LE:
+      code = GE;
+      break;
+    case LTU:
+      code = GTU;
+      break;
+    case LEU:
+      code = GEU;
+      break;
+    default:
+      break;
+    }
+  if (code != oldcode)
+    {
+      rtx tmp = op0;
+      op0 = op1;
+      op1 = tmp;
+    }
+
+  mode = GET_MODE (op0);
+  if (mode == VOIDmode)
+    mode = GET_MODE (op1);
+
+  op0 = force_reg (mode, op0);
+  if ((code != EQ && code != NE
+       && (op1 != const0_rtx
+	   || code == GTU  || code == GEU || code == LTU || code == LEU))
+      || (mode == DImode && op1 != const0_rtx)
+      || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
+    op1 = force_reg (mode, op1);
+
+  sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
+			           gen_rtx_fmt_ee (code, SImode, op0, op1)),
+		      mode);
+}
+
+rtx
+sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
+			  rtx op0, rtx op1)
+{
+  rtx target = gen_reg_rtx (SImode);
+  rtx tmp;
+
+  gcc_assert (TARGET_SHMEDIA);
+  switch (code)
+    {
+    case EQ:
+    case GT:
+    case LT:
+    case UNORDERED:
+    case GTU:
+    case LTU:
+      tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
+      emit_insn (gen_cstore4_media (target, tmp, op0, op1));
+      code = NE;
+      break;
+
+    case NE:
+    case GE:
+    case LE:
+    case ORDERED:
+    case GEU:
+    case LEU:
+      tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
+      emit_insn (gen_cstore4_media (target, tmp, op0, op1));
+      code = EQ;
+      break;
+
+    case UNEQ:
+    case UNGE:
+    case UNGT:
+    case UNLE:
+    case UNLT:
+    case LTGT:
+      return NULL_RTX;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (mode == DImode)
+    {
+      rtx t2 = gen_reg_rtx (DImode);
+      emit_insn (gen_extendsidi2 (t2, target));
+      target = t2;
+    }
+
+  return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
+}
+
+/* Called from the md file, set up the operands of a compare instruction.  */
+void
+sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
+{
+  enum rtx_code code = GET_CODE (operands[0]);
+  enum rtx_code branch_code;
+  rtx op0 = operands[1];
+  rtx op1 = operands[2];
+  rtx insn, tem;
+  bool need_ccmpeq = false;
+
+  if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      op0 = force_reg (mode, op0);
+      op1 = force_reg (mode, op1);
+    }
+  else
+    {
+      if (code != EQ || mode == DImode)
+	{
+	  /* Force args into regs, since we can't use constants here.  */
+	  op0 = force_reg (mode, op0);
+	  if (op1 != const0_rtx || code == GTU  || code == GEU)
+	    op1 = force_reg (mode, op1);
+        }
+    }
+
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      if (code == LT
+	  || (code == LE && TARGET_IEEE && TARGET_SH2E)
+	  || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
+	{
+	  tem = op0, op0 = op1, op1 = tem;
+	  code = swap_condition (code);
+	}
+
+      /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only.  */
+      if (code == GE)
+	{
+	  gcc_assert (TARGET_IEEE && TARGET_SH2E);
+	  need_ccmpeq = true;
+	  code = GT;
+	}
+
+      /* Now we can have EQ, NE, GT, LE.  NE and LE are then transformed
+	 to EQ/GT respectively.  */
+      gcc_assert (code == EQ || code == GT || code == NE || code == LE);
+    }
+
+  switch (code)
+    {
+    case EQ:
+    case GT:
+    case GE:
+    case GTU:
+    case GEU:
+      branch_code = code;
+      break;
+    case NE:
+    case LT:
+    case LE:
+    case LTU:
+    case LEU:
+      branch_code = reverse_condition (code);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  insn = gen_rtx_SET (VOIDmode,
+		      get_t_reg_rtx (),
+		      gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
+
+  sh_emit_set_t_insn (insn, mode);
+  if (need_ccmpeq)
+    sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
+
+  if (branch_code == code)
+    emit_jump_insn (gen_branch_true (operands[3]));
+  else
+    emit_jump_insn (gen_branch_false (operands[3]));
+}
+
+void
+sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+  rtx lab = NULL_RTX;
+  bool invert = false;
+  rtx tem;
+
+  op0 = force_reg (mode, op0);
+  if ((code != EQ && code != NE
+       && (op1 != const0_rtx
+	   || code == GTU  || code == GEU || code == LTU || code == LEU))
+      || (mode == DImode && op1 != const0_rtx)
+      || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
+    op1 = force_reg (mode, op1);
+
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    {
+      if (code == LT || code == LE)
+	{
+	  code = swap_condition (code);
+	  tem = op0, op0 = op1, op1 = tem;
+	}
+      if (code == GE)
+	{
+	  if (TARGET_IEEE)
+	    {
+	      lab = gen_label_rtx ();
+	      sh_emit_scc_to_t (EQ, op0, op1);
+	      emit_jump_insn (gen_branch_true (lab));
+	      code = GT;
+	   }
+	  else
+	    {
+	      code = LT;
+	      invert = true;
+	    }
+	}
+    }
+
+  if (code == NE)
+    {
+      code = EQ;
+      invert = true;
+    }
+
+  sh_emit_scc_to_t (code, op0, op1);
+  if (lab)
+    emit_label (lab);
+  if (invert)
+    emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
+  else
+    emit_move_insn (operands[0], get_t_reg_rtx ());
+}
+
+/* Functions to output assembly code.  */
+
+/* Return a sequence of instructions to perform DI or DF move.
+
+   Since the SH cannot move a DI or DF in one instruction, we have
+   to take care when we see overlapping source and dest registers.  */
+const char *
+output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
+		   enum machine_mode mode)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (MEM_P (dst)
+      && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
+    return     "mov.l	%T1,%0"	"\n"
+	   "	mov.l	%1,%0";
+
+  if (register_operand (dst, mode)
+      && register_operand (src, mode))
+    {
+      if (REGNO (src) == MACH_REG)
+	return         "sts	mach,%S0" "\n"
+	       "	sts	macl,%R0";
+
+      /* When mov.d r1,r2 do r2->r3 then r1->r2;
+         when mov.d r1,r0 do r1->r0 then r2->r1.  */
+      if (REGNO (src) + 1 == REGNO (dst))
+	return         "mov	%T1,%T0" "\n"
+	       "	mov	%1,%0";
+      else
+	return         "mov	%1,%0" "\n"
+	       "	mov	%T1,%T0";
+    }
+  else if (CONST_INT_P (src))
+    {
+      if (INTVAL (src) < 0)
+	output_asm_insn ("mov	#-1,%S0", operands);
+      else
+	output_asm_insn ("mov	#0,%S0", operands);
+
+      return "mov	%1,%R0";
+    }
+  else if (MEM_P (src))
+    {
+      int ptrreg = -1;
+      int dreg = REGNO (dst);
+      rtx inside = XEXP (src, 0);
+
+      switch (GET_CODE (inside))
+	{
+	case REG:
+	  ptrreg = REGNO (inside);
+	  break;
+
+	case SUBREG:
+	  ptrreg = subreg_regno (inside);
+	  break;
+
+	case PLUS:
+	  ptrreg = REGNO (XEXP (inside, 0));
+	  /* ??? A r0+REG address shouldn't be possible here, because it isn't
+	     an offsettable address.  Unfortunately, offsettable addresses use
+	     QImode to check the offset, and a QImode offsettable address
+	     requires r0 for the other operand, which is not currently
+	     supported, so we can't use the 'o' constraint.
+	     Thus we must check for and handle r0+REG addresses here.
+	     We punt for now, since this is likely very rare.  */
+	  gcc_assert (!REG_P (XEXP (inside, 1)));
+	  break;
+	  
+	case LABEL_REF:
+	  return       "mov.l	%1,%0" "\n"
+		 "	mov.l	%1+4,%T0";
+	case POST_INC:
+	  return       "mov.l	%1,%0" "\n"
+		 "	mov.l	%1,%T0";
+	default:
+	  gcc_unreachable ();
+	}
+
+      /* Work out the safe way to copy.  Copy into the second half first.  */
+      if (dreg == ptrreg)
+	return         "mov.l	%T1,%T0" "\n"
+	       "	mov.l	%1,%0";
+    }
+
+  return       "mov.l	%1,%0" "\n"
+	 "	mov.l	%T1,%T0";
+}
+
+/* Print an instruction which would have gone into a delay slot after
+   another instruction, but couldn't because the other instruction expanded
+   into a sequence where putting the slot insn at the end wouldn't work.  */
+static void
+print_slot (rtx insn)
+{
+  final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
+
+  INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
+}
+
+const char *
+output_far_jump (rtx insn, rtx op)
+{
+  struct { rtx lab, reg, op; } this_jmp;
+  rtx braf_base_lab = NULL_RTX;
+  const char *jump;
+  int far;
+  int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
+  rtx prev;
+
+  this_jmp.lab = gen_label_rtx ();
+
+  if (TARGET_SH2
+      && offset >= -32764
+      && offset - get_attr_length (insn) <= 32766)
+    {
+      far = 0;
+      jump =   "mov.w	%O0,%1" "\n"
+	     "	braf	%1";
+    }
+  else
+    {
+      far = 1;
+      if (flag_pic)
+	{
+	  if (TARGET_SH2)
+	    jump =     "mov.l	%O0,%1" "\n"
+		   "	braf	%1";
+	  else
+	    jump =     "mov.l	r0,@-r15"	"\n"
+		   "	mova	%O0,r0"		"\n"
+		   "	mov.l	@r0,%1"		"\n"
+		   "	add	r0,%1"		"\n"
+		   "	mov.l	@r15+,r0"	"\n"
+		   "	jmp	@%1";
+	}
+      else
+	jump =         "mov.l	%O0,%1" "\n"
+	       "	jmp	@%1";
+    }
+  /* If we have a scratch register available, use it.  */
+  if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
+      && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
+    {
+      this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
+      if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
+	jump =         "mov.l	r1,@-r15"	"\n"
+	       "	mova	%O0,r0"		"\n"
+	       "	mov.l	@r0,r1"		"\n"
+	       "	add	r1,r0"		"\n"
+	       "	mov.l	@r15+,r1"	"\n"
+	       "	jmp	@%1";
+      output_asm_insn (jump, &this_jmp.lab);
+      if (dbr_sequence_length ())
+	print_slot (final_sequence);
+      else
+	output_asm_insn ("nop", 0);
+    }
+  else
+    {
+      /* Output the delay slot insn first if any.  */
+      if (dbr_sequence_length ())
+	print_slot (final_sequence);
+
+      this_jmp.reg = gen_rtx_REG (SImode, 13);
+      /* We must keep the stack aligned to 8-byte boundaries on SH5.
+	 Fortunately, MACL is fixed and call-clobbered, and we never
+	 need its value across jumps, so save r13 in it instead of in
+	 the stack.  */
+      if (TARGET_SH5)
+	output_asm_insn ("lds	r13,macl", 0);
+      else
+	output_asm_insn ("mov.l	r13,@-r15", 0);
+      output_asm_insn (jump, &this_jmp.lab);
+      if (TARGET_SH5)
+	output_asm_insn ("sts	macl,r13", 0);
+      else
+	output_asm_insn ("mov.l	@r15+,r13", 0);
+    }
+  if (far && flag_pic && TARGET_SH2)
+    {
+      braf_base_lab = gen_label_rtx ();
+      (*targetm.asm_out.internal_label) (asm_out_file, "L",
+				 CODE_LABEL_NUMBER (braf_base_lab));
+    }
+  if (far)
+    output_asm_insn (".align	2", 0);
+  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
+  this_jmp.op = op;
+  if (far && flag_pic)
+    {
+      if (TARGET_SH2)
+	this_jmp.lab = braf_base_lab;
+      output_asm_insn (".long	%O2-%O0", &this_jmp.lab);
+    }
+  else
+    output_asm_insn (far ? ".long	%O2" : ".word %O2-%O0", &this_jmp.lab);
+  return "";
+}
+
+/* Local label counter, used for constants in the pool and inside
+   pattern branches.  */
+static int lf = 100;
+
+/* Output code for ordinary branches.  */
+const char *
+output_branch (int logic, rtx insn, rtx *operands)
+{
+  switch (get_attr_length (insn))
+    {
+    case 6:
+      /* This can happen if filling the delay slot has caused a forward
+	 branch to exceed its range (we could reverse it, but only
+	 when we know we won't overextend other branches; this should
+	 best be handled by relaxation).
+	 It can also happen when other condbranches hoist delay slot insn
+	 from their destination, thus leading to code size increase.
+	 But the branch will still be in the range -4092..+4098 bytes.  */
+      if (! TARGET_RELAX)
+	{
+	  int label = lf++;
+	  /* The call to print_slot will clobber the operands.  */
+	  rtx op0 = operands[0];
+
+	  /* If the instruction in the delay slot is annulled (true), then
+	     there is no delay slot where we can put it now.  The only safe
+	     place for it is after the label.  final will do that by default.  */
+
+	  if (final_sequence
+	      && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
+	      && get_attr_length (XVECEXP (final_sequence, 0, 1)))
+	    {
+	      asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
+	                   ASSEMBLER_DIALECT ? "/" : ".", label);
+	      print_slot (final_sequence);
+	    }
+	  else
+	    asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
+
+	  output_asm_insn ("bra\t%l0", &op0);
+	  fprintf (asm_out_file, "\tnop\n");
+	  (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
+
+	  return "";
+	}
+      /* When relaxing, handle this like a short branch.  The linker
+	 will fix it up if it still doesn't fit after relaxation.  */
+    case 2:
+      return logic ? "bt%.\t%l0" : "bf%.\t%l0";
+
+      /* These are for SH2e, in which we have to account for the
+	 extra nop because of the hardware bug in annulled branches.  */
+    case 8:
+      if (! TARGET_RELAX)
+	{
+	  int label = lf++;
+
+	  gcc_assert (!final_sequence
+		      || !(INSN_ANNULLED_BRANCH_P
+			   (XVECEXP (final_sequence, 0, 0))));
+	  asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
+		       logic ? "f" : "t",
+		       ASSEMBLER_DIALECT ? "/" : ".", label);
+	  fprintf (asm_out_file, "\tnop\n");
+	  output_asm_insn ("bra\t%l0", operands);
+	  fprintf (asm_out_file, "\tnop\n");
+	  (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
+
+	  return "";
+	}
+      /* When relaxing, fall through.  */
+    case 4:
+      {
+	char buffer[10];
+
+	sprintf (buffer, "b%s%ss\t%%l0",
+		 logic ? "t" : "f",
+		 ASSEMBLER_DIALECT ? "/" : ".");
+	output_asm_insn (buffer, &operands[0]);
+	return "nop";
+      }
+
+    default:
+      /* There should be no longer branches now - that would
+	 indicate that something has destroyed the branches set
+	 up in machine_dependent_reorg.  */
+      gcc_unreachable ();
+    }
+}
+
+/* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
+   fill in operands 9 as a label to the successor insn.
+   We try to use jump threading where possible.
+   IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
+   we assume the jump is taken.  I.e. EQ means follow jmp and bf, NE means
+   follow jmp and bt, if the address is in range.  */
+const char *
+output_branchy_insn (enum rtx_code code, const char *templ,
+		     rtx insn, rtx *operands)
+{
+  rtx next_insn = NEXT_INSN (insn);
+
+  if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
+    {
+      rtx src = SET_SRC (PATTERN (next_insn));
+      if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
+	{
+	  /* Following branch not taken */
+	  operands[9] = gen_label_rtx ();
+	  emit_label_after (operands[9], next_insn);
+	  INSN_ADDRESSES_NEW (operands[9],
+			      INSN_ADDRESSES (INSN_UID (next_insn))
+			      + get_attr_length (next_insn));
+	  return templ;
+	}
+      else
+	{
+	  int offset = (branch_dest (next_insn)
+			- INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
+	  if (offset >= -252 && offset <= 258)
+	    {
+	      if (GET_CODE (src) == IF_THEN_ELSE)
+		/* branch_true */
+		src = XEXP (src, 1);
+	      operands[9] = src;
+	      return templ;
+	    }
+	}
+    }
+  operands[9] = gen_label_rtx ();
+  emit_label_after (operands[9], insn);
+  INSN_ADDRESSES_NEW (operands[9],
+		      INSN_ADDRESSES (INSN_UID (insn))
+		      + get_attr_length (insn));
+  return templ;
+}
+
+const char *
+output_ieee_ccmpeq (rtx insn, rtx *operands)
+{
+  return output_branchy_insn (NE,      "bt	%l9" "\n"
+				  "	fcmp/eq	%1,%0",
+			      insn, operands);
+}
+
+/* Output the start of the assembler file.  */
+static void
+sh_file_start (void)
+{
+  default_file_start ();
+
+  if (TARGET_ELF)
+    /* We need to show the text section with the proper
+       attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
+       emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
+       will complain.  We can teach GAS specifically about the
+       default attributes for our choice of text section, but
+       then we would have to change GAS again if/when we change
+       the text section name.  */
+    fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
+  else
+    /* Switch to the data section so that the coffsem symbol
+       isn't in the text section.  */
+    switch_to_section (data_section);
+
+  if (TARGET_LITTLE_ENDIAN)
+    fputs ("\t.little\n", asm_out_file);
+
+  if (!TARGET_ELF)
+    {
+      if (TARGET_SHCOMPACT)
+	fputs ("\t.mode\tSHcompact\n", asm_out_file);
+      else if (TARGET_SHMEDIA)
+	fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
+		 TARGET_SHMEDIA64 ? 64 : 32);
+    }
+}
+
+/* Check if PAT includes UNSPEC_CALLER unspec pattern.  */
+static bool
+unspec_caller_rtx_p (rtx pat)
+{
+  rtx base, offset;
+  int i;
+
+  split_const (pat, &base, &offset);
+  if (GET_CODE (base) == UNSPEC)
+    {
+      if (XINT (base, 1) == UNSPEC_CALLER)
+	return true;
+      for (i = 0; i < XVECLEN (base, 0); i++)
+	if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
+	  return true;
+    }
+  return false;
+}
+
+/* Indicate that INSN cannot be duplicated.  This is true for insn
+   that generates a unique label.  */
+static bool
+sh_cannot_copy_insn_p (rtx insn)
+{
+  rtx pat;
+
+  if (!reload_completed || !flag_pic)
+    return false;
+
+  if (!NONJUMP_INSN_P (insn))
+    return false;
+  if (asm_noperands (insn) >= 0)
+    return false;
+
+  pat = PATTERN (insn);
+  if (GET_CODE (pat) != SET)
+    return false;
+  pat = SET_SRC (pat);
+
+  if (unspec_caller_rtx_p (pat))
+    return true;
+
+  return false;
+}
+
+/* Number of instructions used to make an arithmetic right shift by N.  */
+static const char ashiftrt_insns[] =
+  { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
+
+/* Description of a logical left or right shift, when expanded to a sequence
+   of 1/2/8/16 shifts.
+   Notice that one bit right shifts clobber the T bit.  One bit left shifts
+   are done with an 'add Rn,Rm' insn and thus do not clobber the T bit.  */
+enum
+{
+  ASHL_CLOBBERS_T = 1 << 0,
+  LSHR_CLOBBERS_T = 1 << 1
+};
+
+struct ashl_lshr_sequence
+{
+  char insn_count;
+  char amount[6];
+  char clobbers_t;
+};
+
+static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
+{
+  { 0, { 0 },		    0 },		// 0
+  { 1, { 1 },		    LSHR_CLOBBERS_T },
+  { 1, { 2 },		    0 },
+  { 2, { 2, 1 },	    LSHR_CLOBBERS_T },
+  { 2, { 2, 2 },	    0 },		// 4
+  { 3, { 2, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 2, 2, 2 },	    0 },
+  { 4, { 2, 2, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 1, { 8 },		    0 },		// 8
+  { 2, { 8, 1 },	    LSHR_CLOBBERS_T },
+  { 2, { 8, 2 },	    0 },
+  { 3, { 8, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 8, 2, 2 },	    0 },		// 12
+  { 4, { 8, 2, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 8, -2, 8 },	    0 },
+  { 3, { 8, -1, 8 },	    ASHL_CLOBBERS_T },
+  { 1, { 16 },		    0 },		// 16
+  { 2, { 16, 1 },	    LSHR_CLOBBERS_T },
+  { 2, { 16, 2 },	    0 },
+  { 3, { 16, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 16, 2, 2 },	    0 },		// 20
+  { 4, { 16, 2, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 16, -2, 8 },	    0 },
+  { 3, { 16, -1, 8 },	    ASHL_CLOBBERS_T },
+  { 2, { 16, 8 },	    0 },		// 24
+  { 3, { 16, 1, 8 },	    LSHR_CLOBBERS_T },
+  { 3, { 16, 8, 2 },	    0 },
+  { 4, { 16, 8, 1, 2 },     LSHR_CLOBBERS_T },
+  { 4, { 16, 8, 2, 2 },	    0 },		// 28
+  { 4, { 16, -1, -2, 16 },  ASHL_CLOBBERS_T },
+  { 3, { 16, -2, 16 },	    0 },
+
+  /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
+     For a left shift by 31 a 2 insn and-rotl sequences can be used.
+     However, the shift-and combiner code needs this entry here to be in
+     terms of real shift insns.  */
+  { 3, { 16, -1, 16 },	    ASHL_CLOBBERS_T }
+};
+
+/* Individual shift amounts for shift amounts < 16, up to three highmost
+   bits might be clobbered.  This is typically used when combined with some
+   kind of sign or zero extension.  */
+static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
+{
+  { 0, { 0 },		    0 },		// 0
+  { 1, { 1 },		    LSHR_CLOBBERS_T },
+  { 1, { 2 },		    0 },
+  { 2, { 2, 1 },	    LSHR_CLOBBERS_T },
+  { 2, { 2, 2 },	    0 },		// 4
+  { 3, { 2, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 2, { 8, -2 },	    0 },
+  { 2, { 8, -1 },	    ASHL_CLOBBERS_T },
+  { 1, { 8 },		    0 },		// 8
+  { 2, { 8, 1 },	    LSHR_CLOBBERS_T },
+  { 2, { 8, 2 },	    0 },
+  { 3, { 8, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 8, 2, 2 },	    0 },		// 12
+  { 3, { 16, -2, -1 },	    ASHL_CLOBBERS_T },
+  { 2, { 16, -2 },	    0 },
+  { 2, { 16, -1 },	    ASHL_CLOBBERS_T },
+  { 1, { 16 },		    0 },		// 16
+  { 2, { 16, 1 },	    LSHR_CLOBBERS_T },
+  { 2, { 16, 2 },	    0 },
+  { 3, { 16, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 16, 2, 2 },	    0 },		// 20
+  { 4, { 16, 2, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 3, { 16, -2, 8 },	    0 },
+  { 3, { 16, -1, 8 },	    ASHL_CLOBBERS_T },
+  { 2, { 16, 8 },	    0 },		// 24
+  { 3, { 16, 1, 8 },	    LSHR_CLOBBERS_T },
+  { 3, { 16, 8, 2 },	    0 },
+  { 4, { 16, 8, 1, 2 },	    LSHR_CLOBBERS_T },
+  { 4, { 16, 8, 2, 2 },	    0 },		// 28
+  { 4, { 16, -1, -2, 16 },  ASHL_CLOBBERS_T },
+  { 3, { 16, -2, 16 },	    0 },
+  { 3, { 16, -1, 16 },	    ASHL_CLOBBERS_T }
+};
+
+/* Return true if a shift left consisting of 1/2/8/16 shift instructions
+   will clobber the T bit.  */
+bool
+sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
+{
+  gcc_assert (CONST_INT_P (shift_amount));
+  
+  const int shift_amount_i = INTVAL (shift_amount) & 31;
+
+  /* Special case for shift count of 31: use and-rotl sequence.  */
+  if (shift_amount_i == 31)
+    return true;
+
+  return (ashl_lshr_seq[shift_amount_i].clobbers_t
+	  & ASHL_CLOBBERS_T) != 0;
+}
+
+/* Return true if a logical right shift consisting of 1/2/8/16 shift
+   instructions will clobber the T bit.  */
+bool
+sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
+{
+  gcc_assert (CONST_INT_P (shift_amount));
+
+  const int shift_amount_i = INTVAL (shift_amount) & 31;
+ 
+  /* Special case for shift count of 31: use shll-movt sequence.  */
+  if (shift_amount_i == 31)
+    return true;
+
+  return (ashl_lshr_seq[shift_amount_i].clobbers_t
+	  & LSHR_CLOBBERS_T) != 0;
+}
+
+/* Return true if it is potentially beneficial to use a dynamic shift
+   instruction (shad / shar) instead of a combination of 1/2/8/16 
+   shift instructions for the specified shift count.
+   If dynamic shifts are not available, always return false.  */
+bool
+sh_dynamicalize_shift_p (rtx count)
+{
+  gcc_assert (CONST_INT_P (count));
+
+  const int shift_amount_i = INTVAL (count) & 31;
+  int insn_count;
+
+  /* For left and right shifts, there are shorter 2 insn sequences for
+     shift amounts of 31.  */
+  if (shift_amount_i == 31)
+    insn_count = 2;
+  else
+    insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
+
+  return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
+}
+
+/* Assuming we have a value that has been sign-extended by at least one bit,
+   can we use the ext_shift_amounts with the last shift turned to an
+   arithmetic shift to shift it by N without data loss, and quicker than by
+   other means?  */
+#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
+
+/* Return the cost of a shift.  */
+static inline int
+shiftcosts (rtx x)
+{
+  int value;
+
+  if (TARGET_SHMEDIA)
+    return 1;
+
+  if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
+    {
+      if (GET_MODE (x) == DImode
+	  && CONST_INT_P (XEXP (x, 1))
+	  && INTVAL (XEXP (x, 1)) == 1)
+	return 2;
+
+      /* Everything else is invalid, because there is no pattern for it.  */
+      return -1;
+    }
+  /* If shift by a non constant, then this will be expensive.  */
+  if (!CONST_INT_P (XEXP (x, 1)))
+    return SH_DYNAMIC_SHIFT_COST;
+
+  /* Otherwise, return the true cost in instructions.  Cope with out of range
+     shift counts more or less arbitrarily.  */
+  value = INTVAL (XEXP (x, 1)) & 31;
+
+  if (GET_CODE (x) == ASHIFTRT)
+    {
+      int cost = ashiftrt_insns[value];
+      /* If dynamic shifts are available and profitable in this case, then we
+	 put the constant in a reg and use shad.  */
+      if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
+	cost = 1 + SH_DYNAMIC_SHIFT_COST;
+      return cost;
+    }
+  else
+    return ashl_lshr_seq[value].insn_count;
+}
+
+/* Return the cost of an AND/XOR/IOR operation.  */
+static inline int
+and_xor_ior_costs (rtx x, int code)
+{
+  /* On SH1-4 we have only max. SImode operations.
+     Double the cost for modes > SImode.  */
+  const int cost_scale = !TARGET_SHMEDIA
+			 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
+			 ? 2 : 1;
+
+  /* A logical operation with two registers is a single cycle
+     instruction.  */
+  if (!CONST_INT_P (XEXP (x, 1)))
+    return 1 * cost_scale;
+
+  int i = INTVAL (XEXP (x, 1));
+
+  if (TARGET_SHMEDIA)
+    {
+      if (satisfies_constraint_I10 (XEXP (x, 1))
+	  || satisfies_constraint_J16 (XEXP (x, 1)))
+	return 1;
+      else
+	return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
+    }
+
+  /* These constants are single cycle extu.[bw] instructions.  */
+  if ((i == 0xff || i == 0xffff) && code == AND)
+    return 1 * cost_scale;
+  /* Constants that can be used in an instruction as an immediate are
+     a single cycle, but this requires r0, so make it a little more
+     expensive.  */
+  if (CONST_OK_FOR_K08 (i))
+    return 2 * cost_scale;
+  /* Constants that can be loaded with a mov immediate need one more cycle.
+     This case is probably unnecessary.  */
+  if (CONST_OK_FOR_I08 (i))
+    return 2 * cost_scale;
+  /* Any other constant requires an additional 2 cycle pc-relative load.
+     This case is probably unnecessary.  */
+  return 3 * cost_scale;
+}
+
+/* Return the cost of an addition or a subtraction.  */
+static inline int
+addsubcosts (rtx x)
+{
+  if (GET_MODE (x) == SImode)
+    {
+      /* The addc or subc patterns will eventually become one or two
+	 instructions.  Below are some costs for some of the patterns
+	 which combine would reject because the costs of the individual
+	 insns in the patterns are lower.
+
+	 FIXME: It would be much easier if we had something like insn cost
+	 attributes and the cost calculation machinery used those attributes
+	 in the first place.  This would eliminate redundant recog-like C
+	 code to calculate costs of complex patterns.  */
+      rtx op0 = XEXP (x, 0);
+      rtx op1 = XEXP (x, 1);
+
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (GET_CODE (op0) == AND
+	      && XEXP (op0, 1) == const1_rtx
+	      && (GET_CODE (op1) == PLUS
+		  || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
+	    return 1;
+
+	  if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
+	      && GET_CODE (op1) == LSHIFTRT
+	      && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
+	    return 1;
+	}
+    }
+
+  /* On SH1-4 we have only max. SImode operations.
+     Double the cost for modes > SImode.  */
+  const int cost_scale = !TARGET_SHMEDIA
+			 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
+			 ? 2 : 1;
+
+  /* Adding a register is a single cycle insn.  */
+  if (REG_P (XEXP (x, 1))
+      || GET_CODE (XEXP (x, 1)) == SUBREG)
+    return 1 * cost_scale;
+
+  /* Likewise for small constants.  */
+  if (CONST_INT_P (XEXP (x, 1))
+      && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
+    return 1 * cost_scale;
+
+  if (TARGET_SHMEDIA)
+    switch (GET_CODE (XEXP (x, 1)))
+      {
+      case CONST:
+      case LABEL_REF:
+      case SYMBOL_REF:
+	return TARGET_SHMEDIA64 ? 5 : 3;
+
+      case CONST_INT:
+	if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
+	  return 2;
+	else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
+	  return 3;
+	else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
+	  return 4;
+
+	/* Fall through.  */
+      default:
+	return 5;
+      }
+
+  /* Any other constant requires a 2 cycle pc-relative load plus an
+     addition.  */
+  return 3 * cost_scale;
+}
+
+/* Return the cost of a multiply.  */
+static inline int
+multcosts (rtx x ATTRIBUTE_UNUSED)
+{
+  if (sh_multcost >= 0)
+    return sh_multcost;
+  if (TARGET_SHMEDIA)
+    /* ??? We have a mul insn, but it has a latency of three, and doesn't
+       accept constants.  Ideally, we would use a cost of one or two and
+       add the cost of the operand, but disregard the latter when inside loops
+       and loop invariant code motion is still to follow.
+       Using a multiply first and splitting it later if it's a loss
+       doesn't work because of different sign / zero extension semantics
+       of multiplies vs. shifts.  */
+    return optimize_size ? 2 : 3;
+
+  if (TARGET_SH2)
+    {
+      /* We have a mul insn, so we can never take more than the mul and the
+	 read of the mac reg, but count more because of the latency and extra
+	 reg usage.  */
+      if (optimize_size)
+	return 2;
+      return 3;
+    }
+
+  /* If we're aiming at small code, then just count the number of
+     insns in a multiply call sequence.  */
+  if (optimize_size)
+    return 5;
+
+  /* Otherwise count all the insns in the routine we'd be calling too.  */
+  return 20;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+static bool
+sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+	      int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+      /* The lower-subreg pass decides whether to split multi-word regs
+	 into individual regs by looking at the cost for a SET of certain
+	 modes with the following patterns:
+	   (set (reg) (reg)) 
+	   (set (reg) (const_int 0))
+	 On machines that support vector-move operations a multi-word move
+	 is the same cost as individual reg move.  On SH there is no
+	 vector-move, so we have to provide the correct cost in the number
+	 of move insns to load/store the reg of the mode in question.  */
+    case SET:
+      if (register_operand (SET_DEST (x), VOIDmode)
+	    && (register_operand (SET_SRC (x), VOIDmode)
+		|| satisfies_constraint_Z (SET_SRC (x))))
+	{
+	  const enum machine_mode mode = GET_MODE (SET_DEST (x));
+	  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
+				  / mov_insn_size (mode, TARGET_SH2A));
+	  return true;
+        }
+      return false;
+
+    /* The cost of a mem access is mainly the cost of the address mode.  */
+    case MEM:
+      *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
+				true);
+      return true;
+
+    /* The cost of a sign or zero extend depends on whether the source is a
+       reg or a mem.  In case of a mem take the address into acount.  */
+    case SIGN_EXTEND:
+      if (REG_P (XEXP (x, 0)))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      if (MEM_P (XEXP (x, 0)))
+	{
+	  *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
+				    GET_MODE (XEXP (x, 0)),
+				    MEM_ADDR_SPACE (XEXP (x, 0)), true);
+	  return true;
+	}
+      return false;
+
+    case ZERO_EXTEND:
+      if (REG_P (XEXP (x, 0)))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
+	       && (GET_MODE (XEXP (x, 0)) == QImode
+		   || GET_MODE (XEXP (x, 0)) == HImode))
+	{
+	  /* Handle SH2A's movu.b and movu.w insn.  */
+	  *total = sh_address_cost (XEXP (XEXP (x, 0), 0), 
+				    GET_MODE (XEXP (x, 0)), 
+				    MEM_ADDR_SPACE (XEXP (x, 0)), true);
+	  return true;
+	}
+      return false;
+
+    /* mems for SFmode and DFmode can be inside a parallel due to
+       the way the fpscr is handled.  */
+    case PARALLEL:
+      for (int i = 0; i < XVECLEN (x, 0); i++)
+	{
+	  rtx xx = XVECEXP (x, 0, i);
+	  if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
+	    {
+	      *total = sh_address_cost (XEXP (XEXP (xx, 0), 0), 
+					GET_MODE (XEXP (xx, 0)),
+					MEM_ADDR_SPACE (XEXP (xx, 0)), true);
+	      return true;
+	    }
+	  if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
+	    {
+	      *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
+					GET_MODE (XEXP (xx, 1)),
+					MEM_ADDR_SPACE (XEXP (xx, 1)), true);
+	      return true;
+	    }
+	}
+
+      if (sh_1el_vec (x, VOIDmode))
+	*total = outer_code != SET;
+      else if (sh_rep_vec (x, VOIDmode))
+	*total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
+		  + (outer_code != SET));
+      else
+	*total = COSTS_N_INSNS (3) + (outer_code != SET);
+      return true;
+
+    case CONST_INT:
+      if (TARGET_SHMEDIA)
+	{
+	  if (INTVAL (x) == 0)
+	    *total = 0;
+	  else if (outer_code == AND && and_operand ((x), DImode))
+	    *total = 0;
+	  else if ((outer_code == IOR || outer_code == XOR
+	            || outer_code == PLUS)
+		   && CONST_OK_FOR_I10 (INTVAL (x)))
+	    *total = 0;
+	  else if (CONST_OK_FOR_I16 (INTVAL (x)))
+	    *total = COSTS_N_INSNS (outer_code != SET);
+	  else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
+	    *total = COSTS_N_INSNS ((outer_code != SET) + 1);
+	  else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
+	    *total = COSTS_N_INSNS ((outer_code != SET) + 2);
+	  else
+	    *total = COSTS_N_INSNS ((outer_code != SET) + 3);
+	  return true;
+	}
+      if (CONST_OK_FOR_I08 (INTVAL (x)))
+        *total = 0;
+      else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
+	       && CONST_OK_FOR_K08 (INTVAL (x)))
+        *total = 1;
+      /* prepare_cmp_insn will force costly constants int registers before
+	 the cbranch[sd]i4 patterns can see them, so preserve potentially
+	 interesting ones not covered by I08 above.  */
+      else if (outer_code == COMPARE
+	       && ((unsigned HOST_WIDE_INT) INTVAL (x)
+		    == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
+		    || INTVAL (x) == 0x7fffffff
+		   || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
+        *total = 1;
+      else
+        *total = 8;
+      return true;
+
+    case EQ:
+      /* An and with a constant compared against zero is
+	 most likely going to be a TST #imm, R0 instruction.
+	 Notice that this does not catch the zero_extract variants from
+	 the md file.  */
+      if (GET_CODE (XEXP (x, 0)) == AND
+	  && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
+	{
+	  *total = 1;
+	  return true;
+	}
+      else
+	return false;
+
+    case SMIN:
+    case SMAX:
+      /* This is most likely a clips.b or clips.w insn that is being made up
+	 by combine.  */
+      if (TARGET_SH2A
+	  && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	  && REG_P (XEXP (XEXP (x, 0), 0))
+	  && CONST_INT_P (XEXP (x, 1)))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      else
+	return false;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      if (TARGET_SHMEDIA64)
+	*total = COSTS_N_INSNS (4);
+      else if (TARGET_SHMEDIA32)
+	*total = COSTS_N_INSNS (2);
+      else
+	*total = 5;
+      return true;
+
+    case CONST_DOUBLE:
+      if (TARGET_SHMEDIA)
+	*total = COSTS_N_INSNS (4);
+      /* prepare_cmp_insn will force costly constants int registers before
+	 the cbranchdi4 pattern can see them, so preserve potentially
+	 interesting ones.  */
+      else if (outer_code == COMPARE && GET_MODE (x) == DImode)
+	*total = 1;
+      else
+	*total = 10;
+      return true;
+
+    case CONST_VECTOR:
+    /* FIXME: This looks broken.  Only the last statement has any effect.
+       Probably this could be folded with the PARALLEL case?  */
+      if (x == CONST0_RTX (GET_MODE (x)))
+	*total = 0;
+      else if (sh_1el_vec (x, VOIDmode))
+	*total = outer_code != SET;
+      if (sh_rep_vec (x, VOIDmode))
+	*total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
+		  + (outer_code != SET));
+      *total = COSTS_N_INSNS (3) + (outer_code != SET);
+      return true;
+
+    case PLUS:
+    case MINUS:
+      *total = COSTS_N_INSNS (addsubcosts (x));
+      return true;
+
+    case AND:
+    case XOR:
+    case IOR:
+      *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
+      return true;
+
+    case MULT:
+      *total = COSTS_N_INSNS (multcosts (x));
+      return true;
+
+    case LT:
+    case GE:
+      /* div0s sign comparison.  */
+      if (GET_CODE (XEXP (x, 0)) == XOR
+	  && REG_P ((XEXP (XEXP (x, 0), 0)))
+	  && REG_P ((XEXP (XEXP (x, 0), 1)))
+	  && satisfies_constraint_Z (XEXP (x, 1)))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      else
+	return false;
+
+    case LSHIFTRT:
+      /* div0s sign comparison.  */
+      if (GET_CODE (XEXP (x, 0)) == XOR
+	  && REG_P ((XEXP (XEXP (x, 0), 0)))
+	  && REG_P ((XEXP (XEXP (x, 0), 1)))
+	  && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      /* Fall through to shiftcosts.  */
+    case ASHIFT:
+    case ASHIFTRT:
+      {
+	int cost = shiftcosts (x);
+	if (cost < 0)
+	  return false;
+	*total = COSTS_N_INSNS (cost);
+	return true;
+      }
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      *total = COSTS_N_INSNS (20);
+      return true;
+
+    case FLOAT:
+    case FIX:
+      *total = 100;
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Determine the size of the fundamental move insn that will be used
+   for the specified mode.  */
+static inline int
+mov_insn_size (enum machine_mode mode, bool consider_sh2a)
+{
+  const int mode_sz = GET_MODE_SIZE (mode);
+
+  if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
+      || (TARGET_FMOVD && mode == DFmode))
+    return mode_sz;
+  else
+    {
+      /* The max. available mode for actual move insns is SImode.
+	 Larger accesses will be split into multiple loads/stores.  */
+      const int max_mov_sz = GET_MODE_SIZE (SImode);
+      return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
+    }
+}
+
+/* Determine the maximum possible displacement for a move insn for the
+   specified mode.  */
+int
+sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
+{
+  /* The 4 byte displacement move insns are the same as the 2 byte
+     versions but take a 12 bit displacement.  All we need to do is to
+     scale the max. displacement value accordingly.  */
+  const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
+
+  /* SH2A supports FPU move insns with 12 bit displacements.
+     Other variants to do not support any kind of displacements for
+     FPU move insns.  */
+  if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
+    return 0;
+  else
+    {
+      const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
+      const int mode_sz = GET_MODE_SIZE (mode);
+      int r = 15 * mov_insn_sz * disp_scale;
+    
+      /* If the mov insn will be split into multiple loads/stores, the
+	 maximum possible displacement is a bit smaller.  */
+      if (mode_sz > mov_insn_sz)
+	r -= mode_sz - mov_insn_sz;
+      return r;
+    }
+}
+
+/* Determine the alignment mask for a move insn of the
+   specified mode.  */
+static inline int
+mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
+{
+  const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
+  return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
+}
+
+/* Return the displacement value of a displacement address.  */
+HOST_WIDE_INT
+sh_disp_addr_displacement (rtx x)
+{
+  gcc_assert (satisfies_constraint_Sdd (x));
+  return INTVAL (XEXP (XEXP (x, 0), 1));
+}
+
+/* Compute the cost of an address.  */
+static int
+sh_address_cost (rtx x, enum machine_mode mode,
+		 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
+{
+  /* 'GBR + 0'.  Account one more because of R0 restriction.  */
+  if (REG_P (x) && REGNO (x) == GBR_REG)
+    return 2;
+
+  /* Simple reg, post-inc, pre-dec addressing.  */
+  if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
+    return 1;
+
+  /* 'reg + disp' addressing.  */
+  if (GET_CODE (x) == PLUS
+      && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
+    {
+      /* 'GBR + disp'.  Account one more because of R0 restriction.  */
+      if (REGNO (XEXP (x, 0)) == GBR_REG
+	  && gbr_displacement (XEXP (x, 1), mode))
+	return 2;
+
+      const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
+
+      if (offset == 0)
+	return 1;
+
+      /* The displacement would fit into a 2 byte move insn.
+	 HImode and QImode loads/stores with displacement put pressure on
+	 R0 which will most likely require another reg copy.  Thus account
+	 a higher cost for that.  */
+      if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
+	return (mode == HImode || mode == QImode) ? 2 : 1;
+
+      /* The displacement would fit into a 4 byte move insn (SH2A).  */
+      if (TARGET_SH2A
+	  && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
+	return 2;
+
+      /* The displacement is probably out of range and will require extra
+	 calculations.  */
+      return 3;
+    }
+
+  /* 'reg + reg' addressing.  Account a slightly higher cost because of 
+     increased pressure on R0.  */
+  if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
+      && ! TARGET_SHMEDIA)
+    return 3;
+
+  /* Not sure what it is - probably expensive.  */
+  return 10;
+}
+
+/* Code to expand a shift.  */
+static void
+gen_ashift (int type, int n, rtx reg)
+{
+  rtx n_rtx;
+
+  /* Negative values here come from the shift_amounts array.  */
+  if (n < 0)
+    {
+      if (type == ASHIFT)
+	type = LSHIFTRT;
+      else
+	type = ASHIFT;
+      n = -n;
+    }
+
+  n_rtx = GEN_INT (n);
+  gcc_assert (satisfies_constraint_P27 (n_rtx));
+
+  switch (type)
+    {
+    case ASHIFTRT:
+      emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
+      break;
+    case LSHIFTRT:
+      if (n == 1)
+	emit_insn (gen_shlr (reg, reg));
+      else
+	emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
+      break;
+    case ASHIFT:
+      emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Code to expand a HImode shift.  */
+static void
+gen_ashift_hi (int type, int n, rtx reg)
+{
+  /* Negative values here come from the shift_amounts array.  */
+  if (n < 0)
+    {
+      if (type == ASHIFT)
+	type = LSHIFTRT;
+      else
+	type = ASHIFT;
+      n = -n;
+    }
+
+  switch (type)
+    {
+    case ASHIFTRT:
+    case LSHIFTRT:
+      /* We don't have HImode right shift operations because using the
+	 ordinary 32 bit shift instructions for that doesn't generate proper
+	 zero/sign extension.
+	 gen_ashift_hi is only called in contexts where we know that the
+	 sign extension works out correctly.  */
+      {
+	int offset = 0;
+	if (GET_CODE (reg) == SUBREG)
+	  {
+	    offset = SUBREG_BYTE (reg);
+	    reg = SUBREG_REG (reg);
+	  }
+	gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
+	break;
+      }
+    case ASHIFT:
+      emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
+      break;
+    }
+}
+
+/* Output RTL to split a constant shift into its component SH constant
+   shift instructions.  */
+void
+gen_shifty_op (int code, rtx *operands)
+{
+  int value = INTVAL (operands[2]);
+  int max, i;
+
+  /* Truncate the shift count in case it is out of bounds.  */
+  value = value & 31;
+
+  if (value == 31)
+    {
+      if (code == LSHIFTRT)
+	{
+	  emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
+	  emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
+	  return;
+	}
+      else if (code == ASHIFT)
+	{
+	  /* There is a two instruction sequence for 31 bit left shifts,
+	     but it requires r0.  */
+	  if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
+	    {
+	      emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
+	      emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+	      return;
+	    }
+	}
+    }
+  else if (value == 0)
+    {
+      /* This can happen even when optimizing, if there were subregs before
+	 reload.  Don't output a nop here, as this is never optimized away;
+	 use a no-op move instead.  */
+      emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
+      return;
+    }
+
+  max = ashl_lshr_seq[value].insn_count;
+  for (i = 0; i < max; i++)
+    gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
+}
+
+/* Same as gen_shifty_op, but optimized for values where the topmost bits
+   don't matter.  */
+void
+gen_shifty_hi_op (int code, rtx *operands)
+{
+  int value = INTVAL (operands[2]);
+  int max, i;
+  void (*gen_fun) (int, int, rtx);
+
+  /* This operation is used by and_shl for SImode values with a few
+     high bits known to be cleared.  */
+  value &= 31;
+  if (value == 0)
+    {
+      emit_insn (gen_nop ());
+      return;
+    }
+
+  gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
+  if (code == ASHIFT)
+    {
+      max = ext_ashl_lshr_seq[value].insn_count;
+      for (i = 0; i < max; i++)
+	gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
+    }
+  else
+    /* When shifting right, emit the shifts in reverse order, so that
+       solitary negative values come first.  */
+    for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
+      gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
+}
+
+/* Output RTL for an arithmetic right shift.
+   ??? Rewrite to use super-optimizer sequences.  */
+bool
+expand_ashiftrt (rtx *operands)
+{
+  rtx wrk;
+  char func[18];
+  int value;
+
+  if (TARGET_DYNSHIFT)
+    {
+      if (!CONST_INT_P (operands[2]))
+	{
+	  rtx count = copy_to_mode_reg (SImode, operands[2]);
+	  emit_insn (gen_negsi2 (count, count));
+	  emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
+	  return true;
+	}
+      else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
+	       > 1 + SH_DYNAMIC_SHIFT_COST)
+	{
+	  rtx count
+	    = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
+	  emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
+	  return true;
+	}
+    }
+  if (!CONST_INT_P (operands[2]))
+    return false;
+
+  value = INTVAL (operands[2]) & 31;
+
+  if (value == 31)
+    {
+      /* If we are called from abs expansion, arrange things so that we
+	 we can use a single MT instruction that doesn't clobber the source,
+	 if LICM can hoist out the load of the constant zero.  */
+      if (currently_expanding_to_rtl)
+	{
+	  emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
+				    operands[1]));
+	  emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
+	  return true;
+	}
+      emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
+      return true;
+    }
+  else if (value >= 16 && value <= 19)
+    {
+      wrk = gen_reg_rtx (SImode);
+      emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
+      value -= 16;
+      while (value--)
+	gen_ashift (ASHIFTRT, 1, wrk);
+      emit_move_insn (operands[0], wrk);
+      return true;
+    }
+  /* Expand a short sequence inline, longer call a magic routine.  */
+  else if (value <= 5)
+    {
+      wrk = gen_reg_rtx (SImode);
+      emit_move_insn (wrk, operands[1]);
+      while (value--)
+	gen_ashift (ASHIFTRT, 1, wrk);
+      emit_move_insn (operands[0], wrk);
+      return true;
+    }
+
+  wrk = gen_reg_rtx (Pmode);
+
+  /* Load the value into an arg reg and call a helper.  */
+  emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+  sprintf (func, "__ashiftrt_r4_%d", value);
+  function_symbol (wrk, func, SFUNC_STATIC);
+  emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
+  emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
+  return true;
+}
+
+/* Try to find a good way to implement the combiner pattern
+  [(set (match_operand:SI 0 "register_operand" "r")
+        (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+                           (match_operand:SI 2 "const_int_operand" "n"))
+                (match_operand:SI 3 "const_int_operand" "n"))) .
+  LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
+  return 0 for simple right / left or left/right shift combination.
+  return 1 for a combination of shifts with zero_extend.
+  return 2 for a combination of shifts with an AND that needs r0.
+  return 3 for a combination of shifts with an AND that needs an extra
+    scratch register, when the three highmost bits of the AND mask are clear.
+  return 4 for a combination of shifts with an AND that needs an extra
+    scratch register, when any of the three highmost bits of the AND mask
+    is set.
+  If ATTRP is set, store an initial right shift width in ATTRP[0],
+  and the instruction length in ATTRP[1] .  These values are not valid
+  when returning 0.
+  When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
+  shift_amounts for the last shift value that is to be used before the
+  sign extend.  */
+int
+shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
+{
+  unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
+  int left = INTVAL (left_rtx), right;
+  int best = 0;
+  int cost, best_cost = 10000;
+  int best_right = 0, best_len = 0;
+  int i;
+  int can_ext;
+
+  if (left < 0 || left > 31)
+    return 0;
+  if (CONST_INT_P (mask_rtx))
+    mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
+  else
+    mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
+  /* Can this be expressed as a right shift / left shift pair?  */
+  lsb = ((mask ^ (mask - 1)) >> 1) + 1;
+  right = exact_log2 (lsb);
+  mask2 = ~(mask + lsb - 1);
+  lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
+  /* mask has no zeroes but trailing zeroes <==> ! mask2 */
+  if (! mask2)
+    best_cost = ashl_lshr_seq[right].insn_count
+		+ ashl_lshr_seq[right + left].insn_count;
+  /* mask has no trailing zeroes <==> ! right */
+  else if (! right && mask2 == ~(lsb2 - 1))
+    {
+      int late_right = exact_log2 (lsb2);
+      best_cost = ashl_lshr_seq[left + late_right].insn_count
+		  + ashl_lshr_seq[late_right].insn_count;
+    }
+  /* Try to use zero extend.  */
+  if (mask2 == ~(lsb2 - 1))
+    {
+      int width, first;
+
+      for (width = 8; width <= 16; width += 8)
+	{
+	  /* Can we zero-extend right away?  */
+	  if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
+	    {
+	      cost = 1 + ext_ashl_lshr_seq[right].insn_count
+		       + ext_ashl_lshr_seq[left + right].insn_count;
+	      if (cost < best_cost)
+		{
+		  best = 1;
+		  best_cost = cost;
+		  best_right = right;
+		  best_len = cost;
+		  if (attrp)
+		    attrp[2] = -1;
+		}
+	      continue;
+	    }
+	  /* ??? Could try to put zero extend into initial right shift,
+	     or even shift a bit left before the right shift.  */
+	  /* Determine value of first part of left shift, to get to the
+	     zero extend cut-off point.  */
+	  first = width - exact_log2 (lsb2) + right;
+	  if (first >= 0 && right + left - first >= 0)
+	    {
+	      cost = ext_ashl_lshr_seq[right].insn_count
+		     + ext_ashl_lshr_seq[first].insn_count + 1
+		     + ext_ashl_lshr_seq[right + left - first].insn_count;
+
+	      if (cost < best_cost)
+		{
+		  best = 1;
+		  best_cost = cost;
+		  best_right = right;
+		  best_len = cost;
+		  if (attrp)
+		    attrp[2] = first;
+		}
+	    }
+	}
+    }
+  /* Try to use r0 AND pattern */
+  for (i = 0; i <= 2; i++)
+    {
+      if (i > right)
+	break;
+      if (! CONST_OK_FOR_K08 (mask >> i))
+	continue;
+      cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
+      if (cost < best_cost)
+	{
+	  best = 2;
+	  best_cost = cost;
+	  best_right = i;
+	  best_len = cost - 1;
+	}
+    }
+  /* Try to use a scratch register to hold the AND operand.  */
+  can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
+  for (i = 0; i <= 2; i++)
+    {
+      if (i > right)
+	break;
+      cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
+	     + (can_ext
+		? ext_ashl_lshr_seq
+		: ashl_lshr_seq)[left + i].insn_count;
+      if (cost < best_cost)
+	{
+	  best = 4 - can_ext;
+	  best_cost = cost;
+	  best_right = i;
+	  best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
+	}
+    }
+
+  if (attrp)
+    {
+      attrp[0] = best_right;
+      attrp[1] = best_len;
+    }
+  return best;
+}
+
+/* This is used in length attributes of the unnamed instructions
+   corresponding to shl_and_kind return values of 1 and 2.  */
+int
+shl_and_length (rtx insn)
+{
+  rtx set_src, left_rtx, mask_rtx;
+  int attributes[3];
+
+  set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  left_rtx = XEXP (XEXP (set_src, 0), 1);
+  mask_rtx = XEXP (set_src, 1);
+  shl_and_kind (left_rtx, mask_rtx, attributes);
+  return attributes[1];
+}
+
+/* This is used in length attribute of the and_shl_scratch instruction.  */
+int
+shl_and_scr_length (rtx insn)
+{
+  rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
+  rtx op = XEXP (set_src, 0);
+  len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
+  op = XEXP (XEXP (op, 0), 0);
+  return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
+}
+
+/* Generate rtl for instructions for which shl_and_kind advised a particular
+   method of generating them, i.e. returned zero.  */
+bool
+gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
+{
+  int attributes[3];
+  unsigned HOST_WIDE_INT mask;
+  int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
+  int right, total_shift;
+  void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
+
+  right = attributes[0];
+  total_shift = INTVAL (left_rtx) + right;
+  mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
+  switch (kind)
+    {
+    default:
+      return true;
+    case 1:
+      {
+	int first = attributes[2];
+	rtx operands[3];
+
+	if (first < 0)
+	  {
+	    emit_insn ((mask << right) <= 0xff
+		       ? gen_zero_extendqisi2 (dest,
+					       gen_lowpart (QImode, source))
+		       : gen_zero_extendhisi2 (dest,
+					       gen_lowpart (HImode, source)));
+	    source = dest;
+	  }
+	if (source != dest)
+	  emit_insn (gen_movsi (dest, source));
+	operands[0] = dest;
+	if (right)
+	  {
+	    operands[2] = GEN_INT (right);
+	    gen_shifty_hi_op (LSHIFTRT, operands);
+	  }
+	if (first > 0)
+	  {
+	    operands[2] = GEN_INT (first);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	    total_shift -= first;
+	    mask <<= first;
+	  }
+	if (first >= 0)
+	  emit_insn (mask <= 0xff
+		     ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
+		     : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	if (total_shift > 0)
+	  {
+	    operands[2] = GEN_INT (total_shift);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	  }
+	break;
+      }
+    case 4:
+      shift_gen_fun = gen_shifty_op;
+    case 3:
+      /* If the topmost bit that matters is set, set the topmost bits
+	 that don't matter.  This way, we might be able to get a shorter
+	 signed constant.  */
+      if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
+	mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
+    case 2:
+      /* Don't expand fine-grained when combining, because that will
+         make the pattern fail.  */
+      if (currently_expanding_to_rtl
+	  || reload_in_progress || reload_completed)
+	{
+	  rtx operands[3];
+
+	  /* Cases 3 and 4 should be handled by this split
+	     only while combining  */
+	  gcc_assert (kind <= 2);
+	  if (right)
+	    {
+	      emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
+	      source = dest;
+	    }
+	  emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
+	  if (total_shift)
+	    {
+	      operands[0] = dest;
+	      operands[1] = dest;
+	      operands[2] = GEN_INT (total_shift);
+	      shift_gen_fun (ASHIFT, operands);
+	    }
+	  break;
+	}
+      else
+	{
+	  int neg = 0;
+	  if (kind != 4 && total_shift < 16)
+	    {
+	      neg = -ext_ashl_lshr_seq[total_shift].amount[1];
+	      if (neg > 0)
+		neg -= ext_ashl_lshr_seq[total_shift].amount[2];
+	      else
+		neg = 0;
+	    }
+	  emit_insn (gen_and_shl_scratch (dest, source,
+					  GEN_INT (right),
+					  GEN_INT (mask),
+					  GEN_INT (total_shift + neg),
+					  GEN_INT (neg)));
+	  emit_insn (gen_movsi (dest, dest));
+	  break;
+	}
+    }
+  return false;
+}
+
+/* Try to find a good way to implement the combiner pattern
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+                                    (match_operand:SI 2 "const_int_operand" "n")
+                         (match_operand:SI 3 "const_int_operand" "n")
+                         (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
+  return 0 for simple left / right shift combination.
+  return 1 for left shift / 8 bit sign extend / left shift.
+  return 2 for left shift / 16 bit sign extend / left shift.
+  return 3 for left shift / 8 bit sign extend / shift / sign extend.
+  return 4 for left shift / 16 bit sign extend / shift / sign extend.
+  return 5 for left shift / 16 bit sign extend / right shift
+  return 6 for < 8 bit sign extend / left shift.
+  return 7 for < 8 bit sign extend / left shift / single right shift.
+  If COSTP is nonzero, assign the calculated cost to *COSTP.  */
+int
+shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
+{
+  int left, size, insize, ext;
+  int cost = 0, best_cost;
+  int kind;
+
+  left = INTVAL (left_rtx);
+  size = INTVAL (size_rtx);
+  insize = size - left;
+  gcc_assert (insize > 0);
+  /* Default to left / right shift.  */
+  kind = 0;
+  best_cost = ashl_lshr_seq[32 - insize].insn_count
+	      + ashl_lshr_seq[32 - size].insn_count;
+  if (size <= 16)
+    {
+      /* 16 bit shift / sign extend / 16 bit shift */
+      cost = ashl_lshr_seq[16 - insize].insn_count + 1
+	     + ashl_lshr_seq[16 - size].insn_count;
+      /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
+	 below, by alternative 3 or something even better.  */
+      if (cost < best_cost)
+	{
+	  kind = 5;
+	  best_cost = cost;
+	}
+    }
+  /* Try a plain sign extend between two shifts.  */
+  for (ext = 16; ext >= insize; ext -= 8)
+    {
+      if (ext <= size)
+	{
+	  cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
+		 + ashl_lshr_seq[size - ext].insn_count;
+	  if (cost < best_cost)
+	    {
+	      kind = ext / (unsigned) 8;
+	      best_cost = cost;
+	    }
+	}
+      /* Check if we can do a sloppy shift with a final signed shift
+	 restoring the sign.  */
+      if (EXT_SHIFT_SIGNED (size - ext))
+	cost = ext_ashl_lshr_seq[ext - insize].insn_count
+	       + ext_ashl_lshr_seq[size - ext].insn_count + 1;
+      /* If not, maybe it's still cheaper to do the second shift sloppy,
+	 and do a final sign extend?  */
+      else if (size <= 16)
+	cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
+	  + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
+	  + 1;
+      else
+	continue;
+      if (cost < best_cost)
+	{
+	  kind = ext / (unsigned) 8 + 2;
+	  best_cost = cost;
+	}
+    }
+  /* Check if we can sign extend in r0 */
+  if (insize < 8)
+    {
+      cost = 3 + ashl_lshr_seq[left].insn_count;
+      if (cost < best_cost)
+	{
+	  kind = 6;
+	  best_cost = cost;
+	}
+      /* Try the same with a final signed shift.  */
+      if (left < 31)
+	{
+	  cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
+	  if (cost < best_cost)
+	    {
+	      kind = 7;
+	      best_cost = cost;
+	    }
+	}
+    }
+  if (TARGET_DYNSHIFT)
+    {
+      /* Try to use a dynamic shift.  */
+      cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
+      if (cost < best_cost)
+	{
+	  kind = 0;
+	  best_cost = cost;
+	}
+    }
+  if (costp)
+    *costp = cost;
+  return kind;
+}
+
+/* Function to be used in the length attribute of the instructions
+   implementing this pattern.  */
+int
+shl_sext_length (rtx insn)
+{
+  rtx set_src, left_rtx, size_rtx;
+  int cost;
+
+  set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+  left_rtx = XEXP (XEXP (set_src, 0), 1);
+  size_rtx = XEXP (set_src, 1);
+  shl_sext_kind (left_rtx, size_rtx, &cost);
+  return cost;
+}
+
+/* Generate rtl for this pattern */
+bool
+gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
+{
+  int kind;
+  int left, size, insize, cost;
+  rtx operands[3];
+
+  kind = shl_sext_kind (left_rtx, size_rtx, &cost);
+  left = INTVAL (left_rtx);
+  size = INTVAL (size_rtx);
+  insize = size - left;
+  switch (kind)
+    {
+    case 1:
+    case 2:
+    case 3:
+    case 4:
+      {
+	int ext = kind & 1 ? 8 : 16;
+	int shift2 = size - ext;
+
+	/* Don't expand fine-grained when combining, because that will
+	   make the pattern fail.  */
+	if (! currently_expanding_to_rtl
+	    && ! reload_in_progress && ! reload_completed)
+	  {
+	    emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+	    emit_insn (gen_movsi (dest, source));
+	    break;
+	  }
+	if (dest != source)
+	  emit_insn (gen_movsi (dest, source));
+	operands[0] = dest;
+	if (ext - insize)
+	  {
+	    operands[2] = GEN_INT (ext - insize);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	  }
+	emit_insn (kind & 1
+		   ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
+		   : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	if (kind <= 2)
+	  {
+	    if (shift2)
+	      {
+		operands[2] = GEN_INT (shift2);
+		gen_shifty_op (ASHIFT, operands);
+	      }
+	  }
+	else
+	  {
+	    if (shift2 > 0)
+	      {
+		if (EXT_SHIFT_SIGNED (shift2))
+		  {
+		    operands[2] = GEN_INT (shift2 + 1);
+		    gen_shifty_op (ASHIFT, operands);
+		    operands[2] = const1_rtx;
+		    gen_shifty_op (ASHIFTRT, operands);
+		    break;
+		  }
+		operands[2] = GEN_INT (shift2);
+		gen_shifty_hi_op (ASHIFT, operands);
+	      }
+	    else if (shift2)
+	      {
+		operands[2] = GEN_INT (-shift2);
+		gen_shifty_hi_op (LSHIFTRT, operands);
+	      }
+	    emit_insn (size <= 8
+		       ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
+		       : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	  }
+	break;
+      }
+    case 5:
+      {
+	int i = 16 - size;
+	if (! currently_expanding_to_rtl
+	    && ! reload_in_progress && ! reload_completed)
+	  emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+	else
+	  {
+	    operands[0] = dest;
+	    operands[2] = GEN_INT (16 - insize);
+	    gen_shifty_hi_op (ASHIFT, operands);
+	    emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+	  }
+	/* Don't use gen_ashrsi3 because it generates new pseudos.  */
+	while (--i >= 0)
+	  gen_ashift (ASHIFTRT, 1, dest);
+	break;
+      }
+    case 6:
+    case 7:
+      /* Don't expand fine-grained when combining, because that will
+	 make the pattern fail.  */
+      if (! currently_expanding_to_rtl
+	  && ! reload_in_progress && ! reload_completed)
+	{
+	  emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+	  emit_insn (gen_movsi (dest, source));
+	  break;
+	}
+      emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
+      emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
+      emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
+      operands[0] = dest;
+      operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
+      gen_shifty_op (ASHIFT, operands);
+      if (kind == 7)
+	emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
+      break;
+    default:
+      return true;
+    }
+  return false;
+}
+
+/* Prefix a symbol_ref name with "datalabel".  */
+rtx
+gen_datalabel_ref (rtx sym)
+{
+  const char *str;
+
+  if (GET_CODE (sym) == LABEL_REF)
+    return gen_rtx_CONST (GET_MODE (sym),
+			  gen_rtx_UNSPEC (GET_MODE (sym),
+					  gen_rtvec (1, sym),
+					  UNSPEC_DATALABEL));
+
+  gcc_assert (GET_CODE (sym) == SYMBOL_REF);
+
+  str = XSTR (sym, 0);
+  /* Share all SYMBOL_REF strings with the same value - that is important
+     for cse.  */
+  str = IDENTIFIER_POINTER (get_identifier (str));
+  XSTR (sym, 0) = str;
+
+  return sym;
+}
+
+
+static alloc_pool label_ref_list_pool;
+
+typedef struct label_ref_list_d
+{
+  rtx label;
+  struct label_ref_list_d *next;
+} *label_ref_list_t;
+
+/* The SH cannot load a large constant into a register, constants have to
+   come from a pc relative load.  The reference of a pc relative load
+   instruction must be less than 1k in front of the instruction.  This
+   means that we often have to dump a constant inside a function, and
+   generate code to branch around it.
+
+   It is important to minimize this, since the branches will slow things
+   down and make things bigger.
+
+   Worst case code looks like:
+
+   mov.l L1,rn
+   bra   L2
+   nop
+   align
+   L1:   .long value
+   L2:
+   ..
+
+   mov.l L3,rn
+   bra   L4
+   nop
+   align
+   L3:   .long value
+   L4:
+   ..
+
+   We fix this by performing a scan before scheduling, which notices which
+   instructions need to have their operands fetched from the constant table
+   and builds the table.
+
+   The algorithm is:
+
+   scan, find an instruction which needs a pcrel move.  Look forward, find the
+   last barrier which is within MAX_COUNT bytes of the requirement.
+   If there isn't one, make one.  Process all the instructions between
+   the find and the barrier.
+
+   In the above example, we can tell that L3 is within 1k of L1, so
+   the first move can be shrunk from the 3 insn+constant sequence into
+   just 1 insn, and the constant moved to L3 to make:
+
+   mov.l        L1,rn
+   ..
+   mov.l        L3,rn
+   bra          L4
+   nop
+   align
+   L3:.long value
+   L4:.long value
+
+   Then the second move becomes the target for the shortening process.  */
+
+typedef struct
+{
+  rtx value;			/* Value in table.  */
+  rtx label;			/* Label of value.  */
+  label_ref_list_t wend;	/* End of window.  */
+  enum machine_mode mode;	/* Mode of value.  */
+
+  /* True if this constant is accessed as part of a post-increment
+     sequence.  Note that HImode constants are never accessed in this way.  */
+  bool part_of_sequence_p;
+} pool_node;
+
+/* The maximum number of constants that can fit into one pool, since
+   constants in the range 0..510 are at least 2 bytes long, and in the
+   range from there to 1018 at least 4 bytes.  */
+
+#define MAX_POOL_SIZE 372
+static pool_node pool_vector[MAX_POOL_SIZE];
+static int pool_size;
+static rtx pool_window_label;
+static int pool_window_last;
+
+static int max_labelno_before_reorg;
+
+/* ??? If we need a constant in HImode which is the truncated value of a
+   constant we need in SImode, we could combine the two entries thus saving
+   two bytes.  Is this common enough to be worth the effort of implementing
+   it?  */
+
+/* ??? This stuff should be done at the same time that we shorten branches.
+   As it is now, we must assume that all branches are the maximum size, and
+   this causes us to almost always output constant pools sooner than
+   necessary.  */
+
+/* Add a constant to the pool and return its label.  */
+static rtx
+add_constant (rtx x, enum machine_mode mode, rtx last_value)
+{
+  int i;
+  rtx lab, new_rtx;
+  label_ref_list_t ref, newref;
+
+  /* First see if we've already got it.  */
+  for (i = 0; i < pool_size; i++)
+    {
+      if (x->code == pool_vector[i].value->code
+	  && mode == pool_vector[i].mode)
+	{
+	  if (x->code == CODE_LABEL)
+	    {
+	      if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
+		continue;
+	    }
+	  if (rtx_equal_p (x, pool_vector[i].value))
+	    {
+	      lab = new_rtx = 0;
+	      if (! last_value
+		  || ! i
+		  || ! rtx_equal_p (last_value, pool_vector[i-1].value))
+		{
+		  new_rtx = gen_label_rtx ();
+		  LABEL_REFS (new_rtx) = pool_vector[i].label;
+		  pool_vector[i].label = lab = new_rtx;
+		}
+	      if (lab && pool_window_label)
+		{
+		  newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
+		  newref->label = pool_window_label;
+		  ref = pool_vector[pool_window_last].wend;
+		  newref->next = ref;
+		  pool_vector[pool_window_last].wend = newref;
+		}
+	      if (new_rtx)
+		pool_window_label = new_rtx;
+	      pool_window_last = i;
+	      return lab;
+	    }
+	}
+    }
+
+  /* Need a new one.  */
+  pool_vector[pool_size].value = x;
+  if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
+    {
+      lab = 0;
+      pool_vector[pool_size - 1].part_of_sequence_p = true;
+    }
+  else
+    lab = gen_label_rtx ();
+  pool_vector[pool_size].mode = mode;
+  pool_vector[pool_size].label = lab;
+  pool_vector[pool_size].wend = NULL;
+  pool_vector[pool_size].part_of_sequence_p = (lab == 0);
+  if (lab && pool_window_label)
+    {
+      newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
+      newref->label = pool_window_label;
+      ref = pool_vector[pool_window_last].wend;
+      newref->next = ref;
+      pool_vector[pool_window_last].wend = newref;
+    }
+  if (lab)
+    pool_window_label = lab;
+  pool_window_last = pool_size;
+  pool_size++;
+  return lab;
+}
+
+/* Output the literal table.  START, if nonzero, is the first instruction
+   this table is needed for, and also indicates that there is at least one
+   casesi_worker_2 instruction; We have to emit the operand3 labels from
+   these insns at a 4-byte  aligned position.  BARRIER is the barrier
+   after which we are to place the table.  */
+static void
+dump_table (rtx start, rtx barrier)
+{
+  rtx scan = barrier;
+  int i;
+  bool need_align = true;
+  rtx lab;
+  label_ref_list_t ref;
+  bool have_df = false;
+
+  /* Do two passes, first time dump out the HI sized constants.  */
+
+  for (i = 0; i < pool_size; i++)
+    {
+      pool_node *p = &pool_vector[i];
+
+      if (p->mode == HImode)
+	{
+	  if (need_align)
+	    {
+	      scan = emit_insn_after (gen_align_2 (), scan);
+	      need_align = false;
+	    }
+	  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+	    scan = emit_label_after (lab, scan);
+	  scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
+				  scan);
+	  for (ref = p->wend; ref; ref = ref->next)
+	    {
+	      lab = ref->label;
+	      scan = emit_insn_after (gen_consttable_window_end (lab), scan);
+	    }
+	}
+      else if (p->mode == DFmode)
+	have_df = true;
+    }
+
+  need_align = true;
+
+  if (start)
+    {
+      scan = emit_insn_after (gen_align_4 (), scan);
+      need_align = false;
+      for (; start != barrier; start = NEXT_INSN (start))
+	if (NONJUMP_INSN_P (start)
+	    && recog_memoized (start) == CODE_FOR_casesi_worker_2)
+	  {
+	    rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
+	    rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
+
+	    scan = emit_label_after (lab, scan);
+	  }
+    }
+  if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
+    {
+      rtx align_insn = NULL_RTX;
+
+      scan = emit_label_after (gen_label_rtx (), scan);
+      scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
+      need_align = false;
+
+      for (i = 0; i < pool_size; i++)
+	{
+	  pool_node *p = &pool_vector[i];
+
+	  switch (p->mode)
+	    {
+	    case HImode:
+	      break;
+	    case SImode:
+	    case SFmode:
+	      if (align_insn && !p->part_of_sequence_p)
+		{
+		  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+		    emit_label_before (lab, align_insn);
+		  emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
+				    align_insn);
+		  for (ref = p->wend; ref; ref = ref->next)
+		    {
+		      lab = ref->label;
+		      emit_insn_before (gen_consttable_window_end (lab),
+					align_insn);
+		    }
+		  delete_insn (align_insn);
+		  align_insn = NULL_RTX;
+		  continue;
+		}
+	      else
+		{
+		  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+		    scan = emit_label_after (lab, scan);
+		  scan = emit_insn_after (gen_consttable_4 (p->value,
+							    const0_rtx), scan);
+		  need_align = ! need_align;
+		}
+	      break;
+	    case DFmode:
+	      if (need_align)
+		{
+		  scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
+		  align_insn = scan;
+		  need_align = false;
+		}
+	    case DImode:
+	      for (lab = p->label; lab; lab = LABEL_REFS (lab))
+		scan = emit_label_after (lab, scan);
+	      scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
+				      scan);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+
+	  if (p->mode != HImode)
+	    {
+	      for (ref = p->wend; ref; ref = ref->next)
+		{
+		  lab = ref->label;
+		  scan = emit_insn_after (gen_consttable_window_end (lab),
+					  scan);
+		}
+	    }
+	}
+
+      pool_size = 0;
+    }
+
+  for (i = 0; i < pool_size; i++)
+    {
+      pool_node *p = &pool_vector[i];
+
+      switch (p->mode)
+	{
+	case HImode:
+	  break;
+	case SImode:
+	case SFmode:
+	  if (need_align)
+	    {
+	      need_align = false;
+	      scan = emit_label_after (gen_label_rtx (), scan);
+	      scan = emit_insn_after (gen_align_4 (), scan);
+	    }
+	  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+	    scan = emit_label_after (lab, scan);
+	  scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
+				  scan);
+	  break;
+	case DFmode:
+	case DImode:
+	  if (need_align)
+	    {
+	      need_align = false;
+	      scan = emit_label_after (gen_label_rtx (), scan);
+	      scan = emit_insn_after (gen_align_4 (), scan);
+	    }
+	  for (lab = p->label; lab; lab = LABEL_REFS (lab))
+	    scan = emit_label_after (lab, scan);
+	  scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
+				  scan);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (p->mode != HImode)
+	{
+	  for (ref = p->wend; ref; ref = ref->next)
+	    {
+	      lab = ref->label;
+	      scan = emit_insn_after (gen_consttable_window_end (lab), scan);
+	    }
+	}
+    }
+
+  scan = emit_insn_after (gen_consttable_end (), scan);
+  scan = emit_barrier_after (scan);
+  pool_size = 0;
+  pool_window_label = NULL_RTX;
+  pool_window_last = 0;
+}
+
+#define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
+
+/* Nonzero if the insn is a move instruction which needs to be fixed.  */
+
+/* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
+   CONST_DOUBLE input value is CONST_OK_FOR_I08.  For a SFmode move, we don't
+   need to fix it if the input value is CONST_OK_FOR_I08.  */
+static bool
+broken_move (rtx insn)
+{
+  if (NONJUMP_INSN_P (insn))
+    {
+      rtx pat = PATTERN (insn);
+      if (GET_CODE (pat) == PARALLEL)
+	pat = XVECEXP (pat, 0, 0);
+      if (GET_CODE (pat) == SET
+	  /* We can load any 8-bit value if we don't care what the high
+	     order bits end up as.  */
+	  && GET_MODE (SET_DEST (pat)) != QImode
+	  && (CONSTANT_P (SET_SRC (pat))
+	      || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
+		  && XINT (SET_SRC (pat), 1) ==  UNSPECV_SP_SWITCH_B)
+	      /* Match mova_const.  */
+	      || (GET_CODE (SET_SRC (pat)) == UNSPEC
+		  && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
+		  && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
+	  && ! (TARGET_SH2E
+		&& GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
+		&& (fp_zero_operand (SET_SRC (pat))
+		    || fp_one_operand (SET_SRC (pat)))
+		/* In general we don't know the current setting of fpscr, so
+		   disable fldi.
+		   There is an exception if this was a register-register move
+		   before reload - and hence it was ascertained that we have
+		   single precision setting - and in a post-reload optimization
+		   we changed this to do a constant load.  In that case
+		   we don't have an r0 clobber, hence we must use fldi.  */
+		&& (TARGET_FMOVD
+		    || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
+			== SCRATCH))
+		&& REG_P (SET_DEST (pat))
+		&& FP_REGISTER_P (REGNO (SET_DEST (pat))))
+	  && ! (TARGET_SH2A
+		&& GET_MODE (SET_DEST (pat)) == SImode
+		&& (satisfies_constraint_I20 (SET_SRC (pat))
+		   || satisfies_constraint_I28 (SET_SRC (pat))))
+	  && ! satisfies_constraint_I08 (SET_SRC (pat)))
+	return true;
+    }
+
+  return false;
+}
+
+/* Return true if the specified insn is a mova insn.  */
+static bool
+mova_p (rtx insn)
+{
+  return (NONJUMP_INSN_P (insn)
+	  && GET_CODE (PATTERN (insn)) == SET
+	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
+	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
+	  /* Don't match mova_const.  */
+	  && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
+}
+
+/* Fix up a mova from a switch that went out of range.  */
+static void
+fixup_mova (rtx mova)
+{
+  PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
+  if (! flag_pic)
+    {
+      SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
+      INSN_CODE (mova) = -1;
+    }
+  else
+    {
+      rtx worker = mova;
+      rtx lab = gen_label_rtx ();
+      rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
+
+      do
+	{
+	  worker = NEXT_INSN (worker);
+	  gcc_assert (worker
+		      && !LABEL_P (worker)
+		      && !JUMP_P (worker));
+	} while (NOTE_P (worker)
+		 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
+      wpat = PATTERN (worker);
+      wpat0 = XVECEXP (wpat, 0, 0);
+      wpat1 = XVECEXP (wpat, 0, 1);
+      wsrc = SET_SRC (wpat0);
+      PATTERN (worker) = (gen_casesi_worker_2
+			  (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
+			   XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
+			   XEXP (wpat1, 0)));
+      INSN_CODE (worker) = -1;
+      target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
+      base = gen_rtx_LABEL_REF (Pmode, lab);
+      diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
+      SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
+      INSN_CODE (mova) = -1;
+    }
+}
+
+/* NEW_MOVA is a mova we've just encountered while scanning forward.  Update
+   *num_mova, and check if the new mova is not nested within the first one.
+   return 0 if *first_mova was replaced, 1 if new_mova was replaced,
+   2 if new_mova has been assigned to *first_mova, -1 otherwise..  */
+static int
+untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
+{
+  int n_addr = 0; /* Initialization to shut up spurious warning.  */
+  int f_target, n_target = 0; /* Likewise.  */
+
+  if (optimize)
+    {
+      /* If NEW_MOVA has no address yet, it will be handled later.  */
+      if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
+	return -1;
+
+      n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
+      n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
+      if (n_addr > n_target || n_addr + 1022 < n_target)
+	{
+	  /* Change the mova into a load.
+	     broken_move will then return true for it.  */
+	  fixup_mova (new_mova);
+	  return 1;
+	}
+    }
+  if (!(*num_mova)++)
+    {
+      *first_mova = new_mova;
+      return 2;
+    }
+  if (!optimize
+      || ((f_target
+	   = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
+	  >= n_target))
+    return -1;
+
+  (*num_mova)--;
+  if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
+      > n_target - n_addr)
+    {
+      fixup_mova (*first_mova);
+      return 0;
+    }
+  else
+    {
+      fixup_mova (new_mova);
+      return 1;
+    }
+}
+
+/* Find the last barrier from insn FROM which is close enough to hold the
+   constant pool.  If we can't find one, then create one near the end of
+   the range.  */
+static rtx
+find_barrier (int num_mova, rtx mova, rtx from)
+{
+  int count_si = 0;
+  int count_hi = 0;
+  int found_hi = 0;
+  int found_si = 0;
+  int found_di = 0;
+  int hi_align = 2;
+  int si_align = 2;
+  int leading_mova = num_mova;
+  rtx barrier_before_mova = NULL_RTX;
+  rtx found_barrier = NULL_RTX;
+  rtx good_barrier = NULL_RTX;
+  int si_limit;
+  int hi_limit;
+  rtx orig = from;
+  rtx last_got = NULL_RTX;
+  rtx last_symoff = NULL_RTX;
+
+  /* For HImode: range is 510, add 4 because pc counts from address of
+     second instruction after this one, subtract 2 for the jump instruction
+     that we may need to emit before the table, subtract 2 for the instruction
+     that fills the jump delay slot (in very rare cases, reorg will take an
+     instruction from after the constant pool or will leave the delay slot
+     empty).  This gives 510.
+     For SImode: range is 1020, add 4 because pc counts from address of
+     second instruction after this one, subtract 2 in case pc is 2 byte
+     aligned, subtract 2 for the jump instruction that we may need to emit
+     before the table, subtract 2 for the instruction that fills the jump
+     delay slot.  This gives 1018.  */
+
+  /* The branch will always be shortened now that the reference address for
+     forward branches is the successor address, thus we need no longer make
+     adjustments to the [sh]i_limit for -O0.  */
+
+  si_limit = 1018;
+  hi_limit = 510;
+
+  while (from && count_si < si_limit && count_hi < hi_limit)
+    {
+      int inc = get_attr_length (from);
+      int new_align = 1;
+
+      /* If this is a label that existed at the time of the compute_alignments
+	 call, determine the alignment.  N.B.  When find_barrier recurses for
+	 an out-of-reach mova, we might see labels at the start of previously
+	 inserted constant tables.  */
+      if (LABEL_P (from)
+	  && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
+	{
+	  if (optimize)
+	    new_align = 1 << label_to_alignment (from);
+	  else if (BARRIER_P (prev_nonnote_insn (from)))
+	    new_align = 1 << barrier_align (from);
+	  else
+	    new_align = 1;
+	  inc = 0;
+	}
+      /* In case we are scanning a constant table because of recursion, check
+	 for explicit alignments.  If the table is long, we might be forced
+	 to emit the new table in front of it; the length of the alignment
+	 might be the last straw.  */
+      else if (NONJUMP_INSN_P (from)
+	       && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
+	       && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
+	new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
+      /* When we find the end of a constant table, paste the new constant
+	 at the end.  That is better than putting it in front because
+	 this way, we don't need extra alignment for adding a 4-byte-aligned
+	 mov(a) label to a 2/4 or 8/4 byte aligned table.  */
+      else if (NONJUMP_INSN_P (from)
+	       && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
+	       && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
+	return from;
+
+      if (BARRIER_P (from))
+	{
+	  rtx next;
+
+	  found_barrier = from;
+
+	  /* If we are at the end of the function, or in front of an alignment
+	     instruction, we need not insert an extra alignment.  We prefer
+	     this kind of barrier.  */
+	  if (barrier_align (from) > 2)
+	    good_barrier = from;
+
+	  /* If we are at the end of a hot/cold block, dump the constants
+	     here.  */
+	  next = NEXT_INSN (from);
+	  if (next
+	      && NOTE_P (next)
+	      && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
+	    break;
+	}
+
+      if (broken_move (from))
+	{
+	  rtx pat, src, dst;
+	  enum machine_mode mode;
+
+	  pat = PATTERN (from);
+	  if (GET_CODE (pat) == PARALLEL)
+	    pat = XVECEXP (pat, 0, 0);
+	  src = SET_SRC (pat);
+	  dst = SET_DEST (pat);
+	  mode = GET_MODE (dst);
+
+	  /* GOT pcrelat setting comes in pair of
+	     mova	.L8,r0
+	     mov.l	.L8,r12
+	     instructions.  (plus add r0,r12).
+	     Remember if we see one without the other.  */
+	  if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
+	    last_got = last_got ? NULL_RTX : from;
+	  else if (PIC_ADDR_P (src))
+	    last_got = last_got ? NULL_RTX : from;
+
+	  /* We must explicitly check the mode, because sometimes the
+	     front end will generate code to load unsigned constants into
+	     HImode targets without properly sign extending them.  */
+	  if (mode == HImode
+	      || (mode == SImode && satisfies_constraint_I16 (src)
+		  && REGNO (dst) != FPUL_REG))
+	    {
+	      found_hi += 2;
+	      /* We put the short constants before the long constants, so
+		 we must count the length of short constants in the range
+		 for the long constants.  */
+	      /* ??? This isn't optimal, but is easy to do.  */
+	      si_limit -= 2;
+	    }
+	  else
+	    {
+	      /* We dump DF/DI constants before SF/SI ones, because
+		 the limit is the same, but the alignment requirements
+		 are higher.  We may waste up to 4 additional bytes
+		 for alignment, and the DF/DI constant may have
+		 another SF/SI constant placed before it.  */
+	      if (TARGET_SHCOMPACT
+		  && ! found_di
+		  && (mode == DFmode || mode == DImode))
+		{
+		  found_di = 1;
+		  si_limit -= 8;
+		}
+	      while (si_align > 2 && found_si + si_align - 2 > count_si)
+		si_align >>= 1;
+	      if (found_si > count_si)
+		count_si = found_si;
+	      found_si += GET_MODE_SIZE (mode);
+	      if (num_mova)
+		si_limit -= GET_MODE_SIZE (mode);
+	    }
+	}
+
+      if (mova_p (from))
+	{
+	  switch (untangle_mova (&num_mova, &mova, from))
+	    {
+	      case 1:
+		if (flag_pic)
+		  {
+		    rtx src = SET_SRC (PATTERN (from));
+		    if (GET_CODE (src) == CONST
+			&& GET_CODE (XEXP (src, 0)) == UNSPEC
+			&& XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
+		      last_symoff = from;
+		  }
+		break;
+	      case 0:	return find_barrier (0, 0, mova);
+	      case 2:
+		{
+		  leading_mova = 0;
+		  barrier_before_mova
+		    = good_barrier ? good_barrier : found_barrier;
+		}
+	      default:	break;
+	    }
+	  if (found_si > count_si)
+	    count_si = found_si;
+	}
+      else if (JUMP_TABLE_DATA_P (from)
+	       && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
+	{
+	  if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
+	      || (num_mova
+		  && (prev_nonnote_insn (from)
+		      == XEXP (MOVA_LABELREF (mova), 0))))
+	    num_mova--;
+	  if (barrier_align (next_real_insn (from)) == align_jumps_log)
+	    {
+	      /* We have just passed the barrier in front of the
+		 ADDR_DIFF_VEC, which is stored in found_barrier.  Since
+		 the ADDR_DIFF_VEC is accessed as data, just like our pool
+		 constants, this is a good opportunity to accommodate what
+		 we have gathered so far.
+		 If we waited any longer, we could end up at a barrier in
+		 front of code, which gives worse cache usage for separated
+		 instruction / data caches.  */
+	      good_barrier = found_barrier;
+	      break;
+	    }
+	  else
+	    {
+	      rtx body = PATTERN (from);
+	      inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
+	    }
+	}
+      /* For the SH1, we generate alignments even after jumps-around-jumps.  */
+      else if (JUMP_P (from)
+	       && ! TARGET_SH2
+	       && ! optimize_size)
+	new_align = 4;
+
+      /* There is a possibility that a bf is transformed into a bf/s by the
+	 delay slot scheduler.  */
+      if (JUMP_P (from)
+	  && get_attr_type (from) == TYPE_CBRANCH
+	  && ! sequence_insn_p (from))
+	inc += 2;
+
+      if (found_si)
+	{
+	  count_si += inc;
+	  if (new_align > si_align)
+	    {
+	      si_limit -= (count_si - 1) & (new_align - si_align);
+	      si_align = new_align;
+	    }
+	  count_si = (count_si + new_align - 1) & -new_align;
+	}
+      if (found_hi)
+	{
+	  count_hi += inc;
+	  if (new_align > hi_align)
+	    {
+	      hi_limit -= (count_hi - 1) & (new_align - hi_align);
+	      hi_align = new_align;
+	    }
+	  count_hi = (count_hi + new_align - 1) & -new_align;
+	}
+      from = NEXT_INSN (from);
+    }
+
+  if (num_mova)
+    {
+      if (leading_mova)
+	{
+	  /* Try as we might, the leading mova is out of range.  Change
+	     it into a load (which will become a pcload) and retry.  */
+	  fixup_mova (mova);
+	  return find_barrier (0, 0, mova);
+	}
+      else
+	{
+	  /* Insert the constant pool table before the mova instruction,
+	     to prevent the mova label reference from going out of range.  */
+	  from = mova;
+	  good_barrier = found_barrier = barrier_before_mova;
+	}
+    }
+
+  if (found_barrier)
+    {
+      if (good_barrier && next_real_insn (found_barrier))
+	found_barrier = good_barrier;
+    }
+  else
+    {
+      /* We didn't find a barrier in time to dump our stuff,
+	 so we'll make one.  */
+      rtx label = gen_label_rtx ();
+
+      /* Don't emit a constant table in the middle of insns for
+	 casesi_worker_2.  This is a bit overkill but is enough
+	 because casesi_worker_2 wouldn't appear so frequently.  */
+      if (last_symoff)
+	from = last_symoff;
+
+      /* If we exceeded the range, then we must back up over the last
+	 instruction we looked at.  Otherwise, we just need to undo the
+	 NEXT_INSN at the end of the loop.  */
+      if (PREV_INSN (from) != orig
+	  && (count_hi > hi_limit || count_si > si_limit))
+	from = PREV_INSN (PREV_INSN (from));
+      else
+	from = PREV_INSN (from);
+
+      /* Don't emit a constant table int the middle of global pointer setting,
+	 since that that would move the addressing base GOT into another table. 
+	 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
+	 in the pool anyway, so just move up the whole constant pool.
+
+	 However, avoid doing so when the last single GOT mov is the starting
+	 insn itself. Going past above the start insn would create a negative
+	 offset, causing errors.  */
+      if (last_got && last_got != orig)
+        from = PREV_INSN (last_got);
+
+      /* Don't insert the constant pool table at the position which
+	 may be the landing pad.  */
+      if (flag_exceptions
+	  && CALL_P (from)
+	  && find_reg_note (from, REG_EH_REGION, NULL_RTX))
+	from = PREV_INSN (from);
+
+      /* Walk back to be just before any jump or label.
+	 Putting it before a label reduces the number of times the branch
+	 around the constant pool table will be hit.  Putting it before
+	 a jump makes it more likely that the bra delay slot will be
+	 filled.  */
+      while (NOTE_P (from) || JUMP_P (from)
+	     || LABEL_P (from))
+	from = PREV_INSN (from);
+
+      /* Make sure we do not split between a call and its corresponding
+	 CALL_ARG_LOCATION note.  */
+      if (CALL_P (from))
+	{
+	  rtx next = NEXT_INSN (from);
+	  if (next && NOTE_P (next)
+	      && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
+	    from = next;
+	}
+
+      from = emit_jump_insn_after (gen_jump (label), from);
+      JUMP_LABEL (from) = label;
+      LABEL_NUSES (label) = 1;
+      found_barrier = emit_barrier_after (from);
+      emit_label_after (label, found_barrier);
+    }
+
+  return found_barrier;
+}
+
+/* If the instruction INSN is implemented by a special function, and we can
+   positively find the register that is used to call the sfunc, and this
+   register is not used anywhere else in this instruction - except as the
+   destination of a set, return this register; else, return 0.  */
+rtx
+sfunc_uses_reg (rtx insn)
+{
+  int i;
+  rtx pattern, part, reg_part, reg;
+
+  if (!NONJUMP_INSN_P (insn))
+    return NULL_RTX;
+  pattern = PATTERN (insn);
+  if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
+    return NULL_RTX;
+
+  for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
+    {
+      part = XVECEXP (pattern, 0, i);
+      if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
+	reg_part = part;
+    }
+  if (! reg_part)
+    return NULL_RTX;
+  reg = XEXP (reg_part, 0);
+  for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
+    {
+      part = XVECEXP (pattern, 0, i);
+      if (part == reg_part || GET_CODE (part) == CLOBBER)
+	continue;
+      if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
+				  && REG_P (SET_DEST (part)))
+				 ? SET_SRC (part) : part)))
+	return NULL_RTX;
+    }
+  return reg;
+}
+
+/* See if the only way in which INSN uses REG is by calling it, or by
+   setting it while calling it.  Set *SET to a SET rtx if the register
+   is set by INSN.  */
+static bool
+noncall_uses_reg (rtx reg, rtx insn, rtx *set)
+{
+  rtx pattern, reg2;
+
+  *set = NULL_RTX;
+
+  reg2 = sfunc_uses_reg (insn);
+  if (reg2 && REGNO (reg2) == REGNO (reg))
+    {
+      pattern = single_set (insn);
+      if (pattern
+	  && REG_P (SET_DEST (pattern))
+	  && REGNO (reg) == REGNO (SET_DEST (pattern)))
+	*set = pattern;
+      return false;
+    }
+  if (!CALL_P (insn))
+    {
+      /* We don't use rtx_equal_p because we don't care if the mode is
+	 different.  */
+      pattern = single_set (insn);
+      if (pattern
+	  && REG_P (SET_DEST (pattern))
+	  && REGNO (reg) == REGNO (SET_DEST (pattern)))
+	{
+	  rtx par, part;
+	  int i;
+
+	  *set = pattern;
+	  par = PATTERN (insn);
+	  if (GET_CODE (par) == PARALLEL)
+	    for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
+	      {
+		part = XVECEXP (par, 0, i);
+		if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
+		  return true;
+	      }
+	  return reg_mentioned_p (reg, SET_SRC (pattern));
+	}
+
+      return true;
+    }
+
+  pattern = PATTERN (insn);
+
+  if (GET_CODE (pattern) == PARALLEL)
+    {
+      int i;
+
+      for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
+	if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
+	  return true;
+      pattern = XVECEXP (pattern, 0, 0);
+    }
+
+  if (GET_CODE (pattern) == SET)
+    {
+      if (reg_mentioned_p (reg, SET_DEST (pattern)))
+	{
+	  /* We don't use rtx_equal_p, because we don't care if the
+	     mode is different.  */
+	  if (!REG_P (SET_DEST (pattern))
+	      || REGNO (reg) != REGNO (SET_DEST (pattern)))
+	    return true;
+
+	  *set = pattern;
+	}
+
+      pattern = SET_SRC (pattern);
+    }
+
+  if (GET_CODE (pattern) != CALL
+      || !MEM_P (XEXP (pattern, 0))
+      || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
+    return true;
+
+  return false;
+}
+
+/* Given a X, a pattern of an insn or a part of it, return a mask of used
+   general registers.  Bits 0..15 mean that the respective registers
+   are used as inputs in the instruction.  Bits 16..31 mean that the
+   registers 0..15, respectively, are used as outputs, or are clobbered.
+   IS_DEST should be set to 16 if X is the destination of a SET, else to 0.  */
+int
+regs_used (rtx x, int is_dest)
+{
+  enum rtx_code code;
+  const char *fmt;
+  int i, used = 0;
+
+  if (! x)
+    return used;
+  code = GET_CODE (x);
+  switch (code)
+    {
+    case REG:
+      if (REGNO (x) < 16)
+	return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+		<< (REGNO (x) + is_dest));
+      return 0;
+    case SUBREG:
+      {
+	rtx y = SUBREG_REG (x);
+
+	if (!REG_P (y))
+	  break;
+	if (REGNO (y) < 16)
+	  return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+		  << (REGNO (y) +
+		      subreg_regno_offset (REGNO (y),
+					   GET_MODE (y),
+					   SUBREG_BYTE (x),
+					   GET_MODE (x)) + is_dest));
+	return 0;
+      }
+    case SET:
+      return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
+    case RETURN:
+      /* If there was a return value, it must have been indicated with USE.  */
+      return 0x00ffff00;
+    case CLOBBER:
+      is_dest = 1;
+      break;
+    case MEM:
+      is_dest = 0;
+      break;
+    case CALL:
+      used |= 0x00ff00f0;
+      break;
+    default:
+      break;
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    used |= regs_used (XVECEXP (x, i, j), is_dest);
+	}
+      else if (fmt[i] == 'e')
+	used |= regs_used (XEXP (x, i), is_dest);
+    }
+  return used;
+}
+
+/* Create an instruction that prevents redirection of a conditional branch
+   to the destination of the JUMP with address ADDR.
+   If the branch needs to be implemented as an indirect jump, try to find
+   a scratch register for it.
+   If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
+   If any preceding insn that doesn't fit into a delay slot is good enough,
+   pass 1.  Pass 2 if a definite blocking insn is needed.
+   -1 is used internally to avoid deep recursion.
+   If a blocking instruction is made or recognized, return it.  */
+static rtx
+gen_block_redirect (rtx jump, int addr, int need_block)
+{
+  int dead = 0;
+  rtx prev = prev_nonnote_insn (jump);
+  rtx dest;
+
+  /* First, check if we already have an instruction that satisfies our need.  */
+  if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
+    {
+      if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
+	return prev;
+      if (GET_CODE (PATTERN (prev)) == USE
+	  || GET_CODE (PATTERN (prev)) == CLOBBER
+	  || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+	prev = jump;
+      else if ((need_block &= ~1) < 0)
+	return prev;
+      else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
+	need_block = 0;
+    }
+  if (GET_CODE (PATTERN (jump)) == RETURN)
+    {
+      if (! need_block)
+	return prev;
+      /* Reorg even does nasty things with return insns that cause branches
+	 to go out of range - see find_end_label and callers.  */
+      return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
+    }
+  /* We can't use JUMP_LABEL here because it might be undefined
+     when not optimizing.  */
+  dest = XEXP (SET_SRC (PATTERN (jump)), 0);
+  /* If the branch is out of range, try to find a scratch register for it.  */
+  if (optimize
+      && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
+	  > 4092 + 4098))
+    {
+      rtx scan;
+      /* Don't look for the stack pointer as a scratch register,
+	 it would cause trouble if an interrupt occurred.  */
+      unsigned attempt = 0x7fff, used;
+      int jump_left = flag_expensive_optimizations + 1;
+
+      /* It is likely that the most recent eligible instruction is wanted for
+	 the delay slot.  Therefore, find out which registers it uses, and
+	 try to avoid using them.  */
+
+      for (scan = jump; (scan = PREV_INSN (scan)); )
+	{
+	  enum rtx_code code;
+
+	  if (INSN_DELETED_P (scan))
+	    continue;
+	  code = GET_CODE (scan);
+	  if (code == CODE_LABEL || code == JUMP_INSN)
+	    break;
+	  if (code == INSN
+	      && GET_CODE (PATTERN (scan)) != USE
+	      && GET_CODE (PATTERN (scan)) != CLOBBER
+	      && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
+	    {
+	      attempt &= ~regs_used (PATTERN (scan), 0);
+	      break;
+	    }
+	}
+      for (used = dead = 0, scan = JUMP_LABEL (jump);
+	   (scan = NEXT_INSN (scan)); )
+	{
+	  enum rtx_code code;
+
+	  if (INSN_DELETED_P (scan))
+	    continue;
+	  code = GET_CODE (scan);
+	  if (INSN_P (scan))
+	    {
+	      used |= regs_used (PATTERN (scan), 0);
+	      if (code == CALL_INSN)
+		used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
+	      dead |= (used >> 16) & ~used;
+	      if (dead & attempt)
+		{
+		  dead &= attempt;
+		  break;
+		}
+	      if (code == JUMP_INSN)
+		{
+		  if (jump_left-- && simplejump_p (scan))
+		    scan = JUMP_LABEL (scan);
+		  else
+		    break;
+		}
+	    }
+	}
+      /* Mask out the stack pointer again, in case it was
+	 the only 'free' register we have found.  */
+      dead &= 0x7fff;
+    }
+  /* If the immediate destination is still in range, check for possible
+     threading with a jump beyond the delay slot insn.
+     Don't check if we are called recursively; the jump has been or will be
+     checked in a different invocation then.  */
+
+  else if (optimize && need_block >= 0)
+    {
+      rtx next = next_active_insn (next_active_insn (dest));
+      if (next && JUMP_P (next)
+	  && GET_CODE (PATTERN (next)) == SET
+	  && recog_memoized (next) == CODE_FOR_jump_compact)
+	{
+	  dest = JUMP_LABEL (next);
+	  if (dest
+	      && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
+		  > 4092 + 4098))
+	    gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
+	}
+    }
+
+  if (dead)
+    {
+      rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
+
+      /* It would be nice if we could convert the jump into an indirect
+	 jump / far branch right now, and thus exposing all constituent
+	 instructions to further optimization.  However, reorg uses
+	 simplejump_p to determine if there is an unconditional jump where
+	 it should try to schedule instructions from the target of the
+	 branch; simplejump_p fails for indirect jumps even if they have
+	 a JUMP_LABEL.  */
+      rtx insn = emit_insn_before (gen_indirect_jump_scratch
+				   (reg, GEN_INT (unspec_bbr_uid++)),
+				   jump);
+      /* ??? We would like this to have the scope of the jump, but that
+	 scope will change when a delay slot insn of an inner scope is added.
+	 Hence, after delay slot scheduling, we'll have to expect
+	 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
+	 the jump.  */
+
+      INSN_LOCATION (insn) = INSN_LOCATION (jump);
+      INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
+      return insn;
+    }
+  else if (need_block)
+    /* We can't use JUMP_LABEL here because it might be undefined
+       when not optimizing.  */
+    return emit_insn_before (gen_block_branch_redirect
+			     (GEN_INT (unspec_bbr_uid++)),
+			     jump);
+  return prev;
+}
+
+#define CONDJUMP_MIN -252
+#define CONDJUMP_MAX 262
+struct far_branch
+{
+  /* A label (to be placed) in front of the jump
+     that jumps to our ultimate destination.  */
+  rtx near_label;
+  /* Where we are going to insert it if we cannot move the jump any farther,
+     or the jump itself if we have picked up an existing jump.  */
+  rtx insert_place;
+  /* The ultimate destination.  */
+  rtx far_label;
+  struct far_branch *prev;
+  /* If the branch has already been created, its address;
+     else the address of its first prospective user.  */
+  int address;
+};
+
+static void gen_far_branch (struct far_branch *);
+enum mdep_reorg_phase_e mdep_reorg_phase;
+static void
+gen_far_branch (struct far_branch *bp)
+{
+  rtx insn = bp->insert_place;
+  rtx jump;
+  rtx label = gen_label_rtx ();
+  int ok;
+
+  emit_label_after (label, insn);
+  if (bp->far_label)
+    {
+      jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
+      LABEL_NUSES (bp->far_label)++;
+    }
+  else
+    jump = emit_jump_insn_after (gen_return (), insn);
+
+  /* Emit a barrier so that reorg knows that any following instructions
+     are not reachable via a fall-through path.
+     But don't do this when not optimizing, since we wouldn't suppress the
+     alignment for the barrier then, and could end up with out-of-range
+     pc-relative loads.  */
+  if (optimize)
+    emit_barrier_after (jump);
+  emit_label_after (bp->near_label, insn);
+
+  if (bp->far_label)
+    JUMP_LABEL (jump) = bp->far_label;
+  else
+    {
+      rtx pat = PATTERN (jump);
+      gcc_assert (ANY_RETURN_P (pat));
+      JUMP_LABEL (jump) = pat;
+    }
+
+  ok = invert_jump (insn, label, 1);
+  gcc_assert (ok);
+
+  /* If we are branching around a jump (rather than a return), prevent
+     reorg from using an insn from the jump target as the delay slot insn -
+     when reorg did this, it pessimized code (we rather hide the delay slot)
+     and it could cause branches to go out of range.  */
+  if (bp->far_label)
+    (emit_insn_after
+     (gen_stuff_delay_slot
+      (GEN_INT (unspec_bbr_uid++),
+       GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
+      insn));
+  /* Prevent reorg from undoing our splits.  */
+  gen_block_redirect (jump, bp->address += 2, 2);
+}
+
+/* Fix up ADDR_DIFF_VECs.  */
+void
+fixup_addr_diff_vecs (rtx first)
+{
+  rtx insn;
+
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    {
+      rtx vec_lab, pat, prev, prevpat, x, braf_label;
+
+      if (! JUMP_TABLE_DATA_P (insn)
+	  || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
+	continue;
+      pat = PATTERN (insn);
+      vec_lab = XEXP (XEXP (pat, 0), 0);
+
+      /* Search the matching casesi_jump_2.  */
+      for (prev = vec_lab; ; prev = PREV_INSN (prev))
+	{
+	  if (!JUMP_P (prev))
+	    continue;
+	  prevpat = PATTERN (prev);
+	  if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
+	    continue;
+	  x = XVECEXP (prevpat, 0, 1);
+	  if (GET_CODE (x) != USE)
+	    continue;
+	  x = XEXP (x, 0);
+	  if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
+	    break;
+	}
+      /* FIXME: This is a bug in the optimizer, but it seems harmless
+	 to just avoid panicing.  */
+      if (!prev)
+	continue;
+
+      /* Emit the reference label of the braf where it belongs, right after
+	 the casesi_jump_2 (i.e. braf).  */
+      braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
+      emit_label_after (braf_label, prev);
+
+      /* Fix up the ADDR_DIF_VEC to be relative
+	 to the reference address of the braf.  */
+      XEXP (XEXP (pat, 0), 0) = braf_label;
+    }
+}
+
+/* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
+   a barrier.  Return the base 2 logarithm of the desired alignment.  */
+int
+barrier_align (rtx barrier_or_label)
+{
+  rtx next, pat;
+
+  if (! barrier_or_label)
+    return 0;
+
+  if (LABEL_P (barrier_or_label)
+      && NEXT_INSN (barrier_or_label)
+      && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
+    return 2;
+
+  if (BARRIER_P (barrier_or_label)
+      && PREV_INSN (barrier_or_label)
+      && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
+    {
+      pat = PATTERN (PREV_INSN (barrier_or_label));
+      /* If this is a very small table, we want to keep the alignment after
+	 the table to the minimum for proper code alignment.  */
+      return ((optimize_size
+	       || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
+		   <= (unsigned) 1 << (CACHE_LOG - 2)))
+	      ? 1 << TARGET_SHMEDIA : align_jumps_log);
+    }
+
+  next = next_active_insn (barrier_or_label);
+
+  if (! next)
+    return 0;
+
+  pat = PATTERN (next);
+
+  if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
+    /* This is a barrier in front of a constant table.  */
+    return 0;
+
+  if (optimize_size)
+    return 0;
+
+  if (! TARGET_SH2 || ! optimize)
+    return align_jumps_log;
+
+  /* When fixing up pcloads, a constant table might be inserted just before
+     the basic block that ends with the barrier.  Thus, we can't trust the
+     instruction lengths before that.  */
+  if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
+    {
+      /* Check if there is an immediately preceding branch to the insn beyond
+	 the barrier.  We must weight the cost of discarding useful information
+	 from the current cache line when executing this branch and there is
+	 an alignment, against that of fetching unneeded insn in front of the
+	 branch target when there is no alignment.  */
+
+      /* There are two delay_slot cases to consider.  One is the simple case
+	 where the preceding branch is to the insn beyond the barrier (simple
+	 delay slot filling), and the other is where the preceding branch has
+	 a delay slot that is a duplicate of the insn after the barrier
+	 (fill_eager_delay_slots) and the branch is to the insn after the insn
+	 after the barrier.  */
+
+      int slot, credit;
+      bool jump_to_next = false;
+
+      /* Skip to the insn before the JUMP_INSN before the barrier under
+	 investigation.  */
+      rtx prev = prev_real_insn (prev_active_insn (barrier_or_label));
+
+      for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
+	   credit >= 0 && prev && NONJUMP_INSN_P (prev);
+	   prev = prev_real_insn (prev))
+	{
+	  jump_to_next = false;
+	  if (GET_CODE (PATTERN (prev)) == USE
+	      || GET_CODE (PATTERN (prev)) == CLOBBER)
+	    continue;
+	  if (GET_CODE (PATTERN (prev)) == SEQUENCE)
+	    {
+	      prev = XVECEXP (PATTERN (prev), 0, 1);
+	      if (INSN_UID (prev) == INSN_UID (next))
+		{
+	  	  /* Delay slot was filled with insn at jump target.  */
+		  jump_to_next = true;
+		  continue;
+  		}
+	    }
+
+	  if (slot &&
+	      get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+	    slot = 0;
+	  credit -= get_attr_length (prev);
+	}
+      if (prev && jump_to_label_p (prev))
+	{
+	  rtx x;
+	  if (jump_to_next
+	      || next_real_insn (JUMP_LABEL (prev)) == next
+	      /* If relax_delay_slots() decides NEXT was redundant
+		 with some previous instruction, it will have
+		 redirected PREV's jump to the following insn.  */
+	      || JUMP_LABEL (prev) == next_nonnote_insn (next)
+	      /* There is no upper bound on redundant instructions
+		 that might have been skipped, but we must not put an
+		 alignment where none had been before.  */
+	      || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
+		  (INSN_P (x)
+		   && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
+		       || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
+		       || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
+	    {
+	      rtx pat = PATTERN (prev);
+	      if (GET_CODE (pat) == PARALLEL)
+		pat = XVECEXP (pat, 0, 0);
+	      if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
+		return 0;
+	    }
+	}
+    }
+
+  return align_jumps_log;
+}
+
+/* If we are inside a phony loop, almost any kind of label can turn up as the
+   first one in the loop.  Aligning a braf label causes incorrect switch
+   destination addresses; we can detect braf labels because they are
+   followed by a BARRIER.
+   Applying loop alignment to small constant or switch tables is a waste
+   of space, so we suppress this too.  */
+int
+sh_loop_align (rtx label)
+{
+  rtx next = label;
+
+  if (! optimize || optimize_size)
+    return 0;
+
+  do
+    next = next_nonnote_insn (next);
+  while (next && LABEL_P (next));
+
+  if (! next
+      || ! INSN_P (next)
+      || recog_memoized (next) == CODE_FOR_consttable_2)
+    return 0;
+
+  return align_loops_log;
+}
+
+/* Do a final pass over the function, just before delayed branch
+   scheduling.  */
+static void
+sh_reorg (void)
+{
+  rtx first, insn, mova = NULL_RTX;
+  int num_mova;
+  rtx r0_rtx = gen_rtx_REG (Pmode, 0);
+  rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
+
+  first = get_insns ();
+  max_labelno_before_reorg = max_label_num ();
+
+  /* We must split call insns before introducing `mova's.  If we're
+     optimizing, they'll have already been split.  Otherwise, make
+     sure we don't split them too late.  */
+  if (! optimize)
+    split_all_insns_noflow ();
+
+  if (TARGET_SHMEDIA)
+    return;
+
+  /* If relaxing, generate pseudo-ops to associate function calls with
+     the symbols they call.  It does no harm to not generate these
+     pseudo-ops.  However, when we can generate them, it enables the
+     linker to potentially relax the jsr to a bsr, and eliminate the
+     register load and, possibly, the constant pool entry.  */
+
+  mdep_reorg_phase = SH_INSERT_USES_LABELS;
+  if (TARGET_RELAX)
+    {
+      /* Remove all REG_LABEL_OPERAND notes.  We want to use them for our
+	 own purposes.  This works because none of the remaining passes
+	 need to look at them.
+
+	 ??? But it may break in the future.  We should use a machine
+	 dependent REG_NOTE, or some other approach entirely.  */
+      for (insn = first; insn; insn = NEXT_INSN (insn))
+	{
+	  if (INSN_P (insn))
+	    {
+	      rtx note;
+
+	      while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
+					    NULL_RTX)) != 0)
+		remove_note (insn, note);
+	    }
+	}
+
+      for (insn = first; insn; insn = NEXT_INSN (insn))
+	{
+	  rtx pattern, reg, link, set, scan, dies, label;
+	  int rescan = 0, foundinsn = 0;
+
+	  if (CALL_P (insn))
+	    {
+	      pattern = PATTERN (insn);
+
+	      if (GET_CODE (pattern) == PARALLEL)
+		pattern = XVECEXP (pattern, 0, 0);
+	      if (GET_CODE (pattern) == SET)
+		pattern = SET_SRC (pattern);
+
+	      if (GET_CODE (pattern) != CALL
+		  || !MEM_P (XEXP (pattern, 0)))
+		continue;
+
+	      reg = XEXP (XEXP (pattern, 0), 0);
+	    }
+	  else
+	    {
+	      reg = sfunc_uses_reg (insn);
+	      if (! reg)
+		continue;
+	    }
+
+	  if (!REG_P (reg))
+	    continue;
+
+	  /* Try scanning backward to find where the register is set.  */
+	  link = NULL;
+	  for (scan = PREV_INSN (insn);
+	       scan && !LABEL_P (scan);
+	       scan = PREV_INSN (scan))
+	    {
+	      if (! INSN_P (scan))
+		continue;
+
+	      if (! reg_mentioned_p (reg, scan))
+		continue;
+
+	      if (noncall_uses_reg (reg, scan, &set))
+		break;
+
+	      if (set)
+		{
+		  link = scan;
+		  break;
+		}
+	    }
+
+	  if (! link)
+	    continue;
+
+	  /* The register is set at LINK.  */
+
+	  /* We can only optimize the function call if the register is
+	     being set to a symbol.  In theory, we could sometimes
+	     optimize calls to a constant location, but the assembler
+	     and linker do not support that at present.  */
+	  if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
+	      && GET_CODE (SET_SRC (set)) != LABEL_REF)
+	    continue;
+
+	  /* Scan forward from LINK to the place where REG dies, and
+	     make sure that the only insns which use REG are
+	     themselves function calls.  */
+
+	  /* ??? This doesn't work for call targets that were allocated
+	     by reload, since there may not be a REG_DEAD note for the
+	     register.  */
+
+	  dies = NULL_RTX;
+	  for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
+	    {
+	      rtx scanset;
+
+	      /* Don't try to trace forward past a CODE_LABEL if we haven't
+		 seen INSN yet.  Ordinarily, we will only find the setting insn
+		 if it is in the same basic block.  However,
+		 cross-jumping can insert code labels in between the load and
+		 the call, and can result in situations where a single call
+		 insn may have two targets depending on where we came from.  */
+
+	      if (LABEL_P (scan) && ! foundinsn)
+		break;
+
+	      if (! INSN_P (scan))
+		continue;
+
+	      /* Don't try to trace forward past a JUMP.  To optimize
+		 safely, we would have to check that all the
+		 instructions at the jump destination did not use REG.  */
+
+	      if (JUMP_P (scan))
+		break;
+
+	      if (! reg_mentioned_p (reg, scan))
+		continue;
+
+	      if (noncall_uses_reg (reg, scan, &scanset))
+		break;
+
+	      if (scan == insn)
+		foundinsn = 1;
+
+	      if (scan != insn
+		  && (CALL_P (scan) || sfunc_uses_reg (scan)))
+		{
+		  /* There is a function call to this register other
+		     than the one we are checking.  If we optimize
+		     this call, we need to rescan again below.  */
+		  rescan = 1;
+		}
+
+	      /* ??? We shouldn't have to worry about SCANSET here.
+		 We should just be able to check for a REG_DEAD note
+		 on a function call.  However, the REG_DEAD notes are
+		 apparently not dependable around libcalls; c-torture
+		 execute/920501-2 is a test case.  If SCANSET is set,
+		 then this insn sets the register, so it must have
+		 died earlier.  Unfortunately, this will only handle
+		 the cases in which the register is, in fact, set in a
+		 later insn.  */
+
+	      /* ??? We shouldn't have to use FOUNDINSN here.
+		 This dates back to when we used LOG_LINKS to find 
+		 the most recent insn which sets the register.  */
+
+	      if (foundinsn
+		  && (scanset
+		      || find_reg_note (scan, REG_DEAD, reg)))
+		{
+		  dies = scan;
+		  break;
+		}
+	    }
+
+	  if (! dies)
+	    {
+	      /* Either there was a branch, or some insn used REG
+		 other than as a function call address.  */
+	      continue;
+	    }
+
+	  /* Create a code label, and put it in a REG_LABEL_OPERAND note
+	     on the insn which sets the register, and on each call insn
+	     which uses the register.  In final_prescan_insn we look for
+	     the REG_LABEL_OPERAND notes, and output the appropriate label
+	     or pseudo-op.  */
+
+	  label = gen_label_rtx ();
+	  add_reg_note (link, REG_LABEL_OPERAND, label);
+	  add_reg_note (insn, REG_LABEL_OPERAND, label);
+	  if (rescan)
+	    {
+	      scan = link;
+	      do
+		{
+		  rtx reg2;
+
+		  scan = NEXT_INSN (scan);
+		  if (scan != insn
+		      && ((CALL_P (scan)
+			   && reg_mentioned_p (reg, scan))
+			  || ((reg2 = sfunc_uses_reg (scan))
+			      && REGNO (reg2) == REGNO (reg))))
+		    add_reg_note (scan, REG_LABEL_OPERAND, label);
+		}
+	      while (scan != dies);
+	    }
+	}
+    }
+
+  if (TARGET_SH2)
+    fixup_addr_diff_vecs (first);
+
+  if (optimize)
+    {
+      mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
+      shorten_branches (first);
+    }
+
+  /* Scan the function looking for move instructions which have to be
+     changed to pc-relative loads and insert the literal tables.  */
+  label_ref_list_pool = create_alloc_pool ("label references list",
+					   sizeof (struct label_ref_list_d),
+					   30);
+  mdep_reorg_phase = SH_FIXUP_PCLOAD;
+  for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
+    {
+      if (mova_p (insn))
+	{
+	  /* ??? basic block reordering can move a switch table dispatch
+	     below the switch table.  Check if that has happened.
+	     We only have the addresses available when optimizing; but then,
+	     this check shouldn't be needed when not optimizing.  */
+	  if (!untangle_mova (&num_mova, &mova, insn))
+	    {
+	      insn = mova;
+	      num_mova = 0;
+	    }
+	}
+      else if (JUMP_TABLE_DATA_P (insn)
+	       && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
+	       && num_mova
+	       /* ??? loop invariant motion can also move a mova out of a
+		  loop.  Since loop does this code motion anyway, maybe we
+		  should wrap UNSPEC_MOVA into a CONST, so that reload can
+		  move it back.  */
+	       && ((num_mova > 1
+		    && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
+		   || (prev_nonnote_insn (insn)
+		       == XEXP (MOVA_LABELREF (mova), 0))))
+	{
+	  rtx scan;
+	  int total;
+
+	  num_mova--;
+
+	  /* Some code might have been inserted between the mova and
+	     its ADDR_DIFF_VEC.  Check if the mova is still in range.  */
+	  for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
+	    total += get_attr_length (scan);
+
+	  /* range of mova is 1020, add 4 because pc counts from address of
+	     second instruction after this one, subtract 2 in case pc is 2
+	     byte aligned.  Possible alignment needed for the ADDR_DIFF_VEC
+	     cancels out with alignment effects of the mova itself.  */
+	  if (total > 1022)
+	    {
+	      /* Change the mova into a load, and restart scanning
+		 there.  broken_move will then return true for mova.  */
+	      fixup_mova (mova);
+	      insn = mova;
+	    }
+	}
+      if (broken_move (insn)
+	  || (NONJUMP_INSN_P (insn)
+	      && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
+	{
+	  rtx scan;
+	  /* Scan ahead looking for a barrier to stick the constant table
+	     behind.  */
+	  rtx barrier = find_barrier (num_mova, mova, insn);
+	  rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
+	  int need_aligned_label = 0;
+
+	  if (num_mova && ! mova_p (mova))
+	    {
+	      /* find_barrier had to change the first mova into a
+		 pcload; thus, we have to start with this new pcload.  */
+	      insn = mova;
+	      num_mova = 0;
+	    }
+	  /* Now find all the moves between the points and modify them.  */
+	  for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
+	    {
+	      if (LABEL_P (scan))
+		last_float = 0;
+	      if (NONJUMP_INSN_P (scan)
+		  && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
+		need_aligned_label = 1;
+	      if (broken_move (scan))
+		{
+		  rtx *patp = &PATTERN (scan), pat = *patp;
+		  rtx src, dst;
+		  rtx lab;
+		  rtx newsrc;
+		  enum machine_mode mode;
+
+		  if (GET_CODE (pat) == PARALLEL)
+		    patp = &XVECEXP (pat, 0, 0), pat = *patp;
+		  src = SET_SRC (pat);
+		  dst = SET_DEST (pat);
+		  mode = GET_MODE (dst);
+
+		  if (mode == SImode && satisfies_constraint_I16 (src)
+		      && REGNO (dst) != FPUL_REG)
+		    {
+		      int offset = 0;
+
+		      mode = HImode;
+		      while (GET_CODE (dst) == SUBREG)
+			{
+			  offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
+							 GET_MODE (SUBREG_REG (dst)),
+							 SUBREG_BYTE (dst),
+							 GET_MODE (dst));
+			  dst = SUBREG_REG (dst);
+			}
+		      dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
+		    }
+		  if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
+		    {
+		      /* This must be an insn that clobbers r0.  */
+		      rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
+						XVECLEN (PATTERN (scan), 0)
+						- 1);
+		      rtx clobber = *clobberp;
+
+		      gcc_assert (GET_CODE (clobber) == CLOBBER
+				  && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
+
+		      if (last_float
+			  && reg_set_between_p (r0_rtx, last_float_move, scan))
+			last_float = 0;
+		      if (last_float
+			  && TARGET_SHCOMPACT
+			  && GET_MODE_SIZE (mode) != 4
+			  && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
+			last_float = 0;
+		      lab = add_constant (src, mode, last_float);
+		      if (lab)
+			emit_insn_before (gen_mova (lab), scan);
+		      else
+			{
+			  /* There will be a REG_UNUSED note for r0 on
+			     LAST_FLOAT_MOVE; we have to change it to REG_INC,
+			     lest reorg:mark_target_live_regs will not
+			     consider r0 to be used, and we end up with delay
+			     slot insn in front of SCAN that clobbers r0.  */
+			  rtx note
+			    = find_regno_note (last_float_move, REG_UNUSED, 0);
+
+			  /* If we are not optimizing, then there may not be
+			     a note.  */
+			  if (note)
+			    PUT_REG_NOTE_KIND (note, REG_INC);
+
+			  *last_float_addr = r0_inc_rtx;
+			}
+		      last_float_move = scan;
+		      last_float = src;
+		      newsrc = gen_const_mem (mode,
+					(((TARGET_SH4 && ! TARGET_FMOVD)
+					  || REGNO (dst) == FPUL_REG)
+					 ? r0_inc_rtx
+					 : r0_rtx));
+		      last_float_addr = &XEXP (newsrc, 0);
+
+		      /* Remove the clobber of r0.  */
+		      *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
+						   gen_rtx_SCRATCH (Pmode));
+		    }
+		  /* This is a mova needing a label.  Create it.  */
+		  else if (GET_CODE (src) == UNSPEC
+			   && XINT (src, 1) == UNSPEC_MOVA
+			   && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
+		    {
+		      lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
+		      newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
+		      newsrc = gen_rtx_UNSPEC (SImode,
+					       gen_rtvec (1, newsrc),
+					       UNSPEC_MOVA);
+		    }
+		  else if (GET_CODE (src) == UNSPEC_VOLATILE
+			   && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
+		    {
+		      newsrc = XVECEXP (src, 0, 0);
+		      XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
+		      INSN_CODE (scan) = -1;
+		      continue;
+		    }
+		  else
+		    {
+		      lab = add_constant (src, mode, 0);
+		      newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
+		      newsrc = gen_const_mem (mode, newsrc);
+		    }
+		  *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
+		  INSN_CODE (scan) = -1;
+		}
+	    }
+	  dump_table (need_aligned_label ? insn : 0, barrier);
+	  insn = barrier;
+	}
+    }
+  free_alloc_pool (label_ref_list_pool);
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    PUT_MODE (insn, VOIDmode);
+
+  mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
+  INSN_ADDRESSES_FREE ();
+  split_branches (first);
+
+  /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
+     also has an effect on the register that holds the address of the sfunc.
+     Insert an extra dummy insn in front of each sfunc that pretends to
+     use this register.  */
+  if (flag_delayed_branch)
+    {
+      for (insn = first; insn; insn = NEXT_INSN (insn))
+	{
+	  rtx reg = sfunc_uses_reg (insn);
+
+	  if (! reg)
+	    continue;
+	  emit_insn_before (gen_use_sfunc_addr (reg), insn);
+	}
+    }
+#if 0
+  /* fpscr is not actually a user variable, but we pretend it is for the
+     sake of the previous optimization passes, since we want it handled like
+     one.  However, we don't have any debugging information for it, so turn
+     it into a non-user variable now.  */
+  if (TARGET_SH4)
+    REG_USERVAR_P (get_fpscr_rtx ()) = 0;
+#endif
+  mdep_reorg_phase = SH_AFTER_MDEP_REORG;
+}
+
+/* Return the UID of the insn that follows the specified label.  */
+int
+get_dest_uid (rtx label, int max_uid)
+{
+  rtx dest = next_real_insn (label);
+  int dest_uid;
+  if (! dest)
+    /* This can happen for an undefined label.  */
+    return 0;
+  dest_uid = INSN_UID (dest);
+  /* If this is a newly created branch redirection blocking instruction,
+     we cannot index the branch_uid or insn_addresses arrays with its
+     uid.  But then, we won't need to, because the actual destination is
+     the following branch.  */
+  while (dest_uid >= max_uid)
+    {
+      dest = NEXT_INSN (dest);
+      dest_uid = INSN_UID (dest);
+    }
+  if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
+    return 0;
+  return dest_uid;
+}
+
+/* Split condbranches that are out of range.  Also add clobbers for
+   scratch registers that are needed in far jumps.
+   We do this before delay slot scheduling, so that it can take our
+   newly created instructions into account.  It also allows us to
+   find branches with common targets more easily.  */
+static void
+split_branches (rtx first)
+{
+  rtx insn;
+  struct far_branch **uid_branch, *far_branch_list = 0;
+  int max_uid = get_max_uid ();
+  int ok;
+
+  /* Find out which branches are out of range.  */
+  shorten_branches (first);
+
+  uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
+  memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
+
+  for (insn = first; insn; insn = NEXT_INSN (insn))
+    if (! INSN_P (insn))
+      continue;
+    else if (INSN_DELETED_P (insn))
+      {
+	/* Shorten_branches would split this instruction again,
+	   so transform it into a note.  */
+	SET_INSN_DELETED (insn);
+      }
+    else if (JUMP_P (insn))
+      {
+	enum attr_type type = get_attr_type (insn);
+	if (type == TYPE_CBRANCH)
+	  {
+	    rtx next, beyond;
+
+	    if (get_attr_length (insn) > 4)
+	      {
+		rtx src = SET_SRC (PATTERN (insn));
+		rtx olabel = XEXP (XEXP (src, 1), 0);
+		int addr = INSN_ADDRESSES (INSN_UID (insn));
+		rtx label = 0;
+		int dest_uid = get_dest_uid (olabel, max_uid);
+		struct far_branch *bp = uid_branch[dest_uid];
+
+		/* redirect_jump needs a valid JUMP_LABEL, and it might delete
+		   the label if the LABEL_NUSES count drops to zero.  There is
+		   always a jump_optimize pass that sets these values, but it
+		   proceeds to delete unreferenced code, and then if not
+		   optimizing, to un-delete the deleted instructions, thus
+		   leaving labels with too low uses counts.  */
+		if (! optimize)
+		  {
+		    JUMP_LABEL (insn) = olabel;
+		    LABEL_NUSES (olabel)++;
+		  }
+		if (! bp)
+		  {
+		    bp = (struct far_branch *) alloca (sizeof *bp);
+		    uid_branch[dest_uid] = bp;
+		    bp->prev = far_branch_list;
+		    far_branch_list = bp;
+		    bp->far_label
+		      = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
+		    LABEL_NUSES (bp->far_label)++;
+		  }
+		else
+		  {
+		    label = bp->near_label;
+		    if (! label && bp->address - addr >= CONDJUMP_MIN)
+		      {
+			rtx block = bp->insert_place;
+
+			if (GET_CODE (PATTERN (block)) == RETURN)
+			  block = PREV_INSN (block);
+			else
+			  block = gen_block_redirect (block,
+						      bp->address, 2);
+			label = emit_label_after (gen_label_rtx (),
+						  PREV_INSN (block));
+			bp->near_label = label;
+		      }
+		    else if (label && ! NEXT_INSN (label))
+		      {
+			if (addr + 2 - bp->address <= CONDJUMP_MAX)
+			  bp->insert_place = insn;
+			else
+			  gen_far_branch (bp);
+		      }
+		  }
+		if (! label
+		    || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
+		  {
+		    bp->near_label = label = gen_label_rtx ();
+		    bp->insert_place = insn;
+		    bp->address = addr;
+		  }
+		ok = redirect_jump (insn, label, 0);
+		gcc_assert (ok);
+	      }
+	    else
+	      {
+		/* get_attr_length (insn) == 2 */
+		/* Check if we have a pattern where reorg wants to redirect
+		   the branch to a label from an unconditional branch that
+		   is too far away.  */
+		/* We can't use JUMP_LABEL here because it might be undefined
+		   when not optimizing.  */
+		/* A syntax error might cause beyond to be NULL_RTX.  */
+		beyond
+		  = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
+					    0));
+
+		if (beyond
+		    && (JUMP_P (beyond)
+			|| ((beyond = next_active_insn (beyond))
+			    && JUMP_P (beyond)))
+		    && GET_CODE (PATTERN (beyond)) == SET
+		    && recog_memoized (beyond) == CODE_FOR_jump_compact
+		    && ((INSN_ADDRESSES
+			 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
+			 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
+			> 252 + 258 + 2))
+		  gen_block_redirect (beyond,
+				      INSN_ADDRESSES (INSN_UID (beyond)), 1);
+	      }
+
+	    next = next_active_insn (insn);
+
+	    if (next
+		&& (JUMP_P (next)
+		    || ((next = next_active_insn (next))
+			&& JUMP_P (next)))
+		&& GET_CODE (PATTERN (next)) == SET
+		&& recog_memoized (next) == CODE_FOR_jump_compact
+		&& ((INSN_ADDRESSES
+		     (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
+		     - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
+		    > 252 + 258 + 2))
+	      gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
+	  }
+	else if (type == TYPE_JUMP || type == TYPE_RETURN)
+	  {
+	    int addr = INSN_ADDRESSES (INSN_UID (insn));
+	    rtx far_label = 0;
+	    int dest_uid = 0;
+	    struct far_branch *bp;
+
+	    if (type == TYPE_JUMP)
+	      {
+		far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
+		dest_uid = get_dest_uid (far_label, max_uid);
+		if (! dest_uid)
+		  {
+		    /* Parse errors can lead to labels outside
+		      the insn stream.  */
+		    if (! NEXT_INSN (far_label))
+		      continue;
+
+		    if (! optimize)
+		      {
+			JUMP_LABEL (insn) = far_label;
+			LABEL_NUSES (far_label)++;
+		      }
+		    redirect_jump (insn, ret_rtx, 1);
+		    far_label = 0;
+		  }
+	      }
+	    bp = uid_branch[dest_uid];
+	    if (! bp)
+	      {
+		bp = (struct far_branch *) alloca (sizeof *bp);
+		uid_branch[dest_uid] = bp;
+		bp->prev = far_branch_list;
+		far_branch_list = bp;
+		bp->near_label = 0;
+		bp->far_label = far_label;
+		if (far_label)
+		  LABEL_NUSES (far_label)++;
+	      }
+	    else if (bp->near_label && ! NEXT_INSN (bp->near_label))
+	      if (addr - bp->address <= CONDJUMP_MAX)
+		emit_label_after (bp->near_label, PREV_INSN (insn));
+	      else
+		{
+		  gen_far_branch (bp);
+		  bp->near_label = 0;
+		}
+	    else
+	      bp->near_label = 0;
+	    bp->address = addr;
+	    bp->insert_place = insn;
+	    if (! far_label)
+	      emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
+	    else
+	      gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
+	  }
+      }
+  /* Generate all pending far branches,
+     and free our references to the far labels.  */
+  while (far_branch_list)
+    {
+      if (far_branch_list->near_label
+	  && ! NEXT_INSN (far_branch_list->near_label))
+	gen_far_branch (far_branch_list);
+      if (optimize
+	  && far_branch_list->far_label
+	  && ! --LABEL_NUSES (far_branch_list->far_label))
+	delete_insn (far_branch_list->far_label);
+      far_branch_list = far_branch_list->prev;
+    }
+
+  /* Instruction length information is no longer valid due to the new
+     instructions that have been generated.  */
+  init_insn_lengths ();
+}
+
+/* Dump out instruction addresses, which is useful for debugging the
+   constant pool table stuff.
+
+   If relaxing, output the label and pseudo-ops used to link together
+   calls and the instruction which set the registers.
+
+   ??? The addresses printed by this routine for insns are nonsense for
+   insns which are inside of a sequence where none of the inner insns have
+   variable length.  This is because the second pass of shorten_branches
+   does not bother to update them.  */
+void
+final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
+		    int noperands ATTRIBUTE_UNUSED)
+{
+  if (TARGET_DUMPISIZE)
+    fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
+
+  if (TARGET_RELAX)
+    {
+      rtx note;
+
+      note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
+      if (note)
+	{
+	  rtx pattern;
+
+	  pattern = PATTERN (insn);
+	  if (GET_CODE (pattern) == PARALLEL)
+	    pattern = XVECEXP (pattern, 0, 0);
+	  switch (GET_CODE (pattern))
+	    {
+	    case SET:
+	      if (GET_CODE (SET_SRC (pattern)) != CALL
+		  && get_attr_type (insn) != TYPE_SFUNC)
+		{
+		  targetm.asm_out.internal_label
+		    (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
+		  break;
+		}
+	      /* else FALLTHROUGH */
+	    case CALL:
+	      asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
+			   CODE_LABEL_NUMBER (XEXP (note, 0)));
+	      break;
+
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+    }
+}
+
+/* Dump out any constants accumulated in the final pass.  These will
+   only be labels.  */
+const char *
+output_jump_label_table (void)
+{
+  int i;
+
+  if (pool_size)
+    {
+      fprintf (asm_out_file, "\t.align 2\n");
+      for (i = 0; i < pool_size; i++)
+	{
+	  pool_node *p = &pool_vector[i];
+
+	  (*targetm.asm_out.internal_label) (asm_out_file, "L",
+				     CODE_LABEL_NUMBER (p->label));
+	  output_asm_insn (".long	%O0", &p->value);
+	}
+      pool_size = 0;
+    }
+
+  return "";
+}
+
+/* A full frame looks like:
+
+   arg-5
+   arg-4
+   [ if current_function_anonymous_args
+   arg-3
+   arg-2
+   arg-1
+   arg-0 ]
+   saved-fp
+   saved-r10
+   saved-r11
+   saved-r12
+   saved-pr
+   local-n
+   ..
+   local-1
+   local-0        <- fp points here.
+
+   Number of bytes pushed for anonymous args, used to pass information
+   between expand_prologue and expand_epilogue.
+
+   Adjust the stack by SIZE bytes.  REG holds the rtl of the register to be
+   adjusted.  If epilogue_p is zero, this is for a prologue; otherwise, it's
+   for an epilogue and a negative value means that it's for a sibcall
+   epilogue.  If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
+   all the registers that are about to be restored, and hence dead.  */
+static void
+output_stack_adjust (int size, rtx reg, int epilogue_p,
+		     HARD_REG_SET *live_regs_mask, bool frame_p)
+{
+  rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
+  if (size)
+    {
+      HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
+
+/* This test is bogus, as output_stack_adjust is used to re-align the
+   stack.  */
+#if 0
+      gcc_assert (!(size % align));
+#endif
+
+      if (CONST_OK_FOR_ADD (size))
+	emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
+      /* Try to do it with two partial adjustments; however, we must make
+	 sure that the stack is properly aligned at all times, in case
+	 an interrupt occurs between the two partial adjustments.  */
+      else if (CONST_OK_FOR_ADD (size / 2 & -align)
+	       && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
+	{
+	  emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
+	  emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
+	}
+      else
+	{
+	  rtx const_reg;
+	  rtx insn;
+	  int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
+	  int i;
+
+	  /* If TEMP is invalid, we could temporarily save a general
+	     register to MACL.  However, there is currently no need
+	     to handle this case, so just die when we see it.  */
+	  if (epilogue_p < 0
+	      || current_function_interrupt
+	      || ! call_really_used_regs[temp] || fixed_regs[temp])
+	    temp = -1;
+	  if (temp < 0 && ! current_function_interrupt
+	      && (TARGET_SHMEDIA || epilogue_p >= 0))
+	    {
+	      HARD_REG_SET temps;
+	      COPY_HARD_REG_SET (temps, call_used_reg_set);
+	      AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
+	      if (epilogue_p > 0)
+		{
+		  int nreg = 0;
+		  if (crtl->return_rtx)
+		    {
+		      enum machine_mode mode;
+		      mode = GET_MODE (crtl->return_rtx);
+		      if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
+			nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
+		    }
+		  for (i = 0; i < nreg; i++)
+		    CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
+		  if (crtl->calls_eh_return)
+		    {
+		      CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
+		      for (i = 0; i <= 3; i++)
+			CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
+		    }
+		}
+	      if (TARGET_SHMEDIA && epilogue_p < 0)
+		for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
+		  CLEAR_HARD_REG_BIT (temps, i);
+	      if (epilogue_p <= 0)
+		{
+		  for (i = FIRST_PARM_REG;
+		       i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
+		    CLEAR_HARD_REG_BIT (temps, i);
+		  if (cfun->static_chain_decl != NULL)
+		    CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
+		}
+	      temp = scavenge_reg (&temps);
+	    }
+	  if (temp < 0 && live_regs_mask)
+	    {
+	      HARD_REG_SET temps;
+
+	      COPY_HARD_REG_SET (temps, *live_regs_mask);
+	      CLEAR_HARD_REG_BIT (temps, REGNO (reg));
+	      temp = scavenge_reg (&temps);
+	    }
+	  if (temp < 0)
+	    {
+	      rtx adj_reg, tmp_reg, mem;
+	      
+	      /* If we reached here, the most likely case is the (sibcall)
+		 epilogue for non SHmedia.  Put a special push/pop sequence
+		 for such case as the last resort.  This looks lengthy but
+		 would not be problem because it seems to be very
+		 rare.  */
+	      
+	      gcc_assert (!TARGET_SHMEDIA && epilogue_p);
+	      
+
+	       /* ??? There is still the slight possibility that r4 or
+		  r5 have been reserved as fixed registers or assigned
+		  as global registers, and they change during an
+		  interrupt.  There are possible ways to handle this:
+		     
+		  - If we are adjusting the frame pointer (r14), we can do
+		    with a single temp register and an ordinary push / pop
+		    on the stack.
+		  - Grab any call-used or call-saved registers (i.e. not
+		    fixed or globals) for the temps we need.  We might
+		    also grab r14 if we are adjusting the stack pointer.
+		    If we can't find enough available registers, issue
+		    a diagnostic and die - the user must have reserved
+		    way too many registers.
+		 But since all this is rather unlikely to happen and
+		 would require extra testing, we just die if r4 / r5
+		 are not available.  */
+	      gcc_assert (!fixed_regs[4] && !fixed_regs[5]
+			  && !global_regs[4] && !global_regs[5]);
+
+	      adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
+	      tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
+	      emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
+	      emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
+	      emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
+	      mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
+	      emit_move_insn (mem, tmp_reg);
+	      emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
+	      mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
+	      emit_move_insn (mem, tmp_reg);
+	      emit_move_insn (reg, adj_reg);
+	      mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
+	      emit_move_insn (adj_reg, mem);
+	      mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
+	      emit_move_insn (tmp_reg, mem);
+	      /* Tell flow the insns that pop r4/r5 aren't dead.  */
+	      emit_use (tmp_reg);
+	      emit_use (adj_reg);
+	      return;
+	    }
+	  const_reg = gen_rtx_REG (GET_MODE (reg), temp);
+
+	  /* If SIZE is negative, subtract the positive value.
+	     This sometimes allows a constant pool entry to be shared
+	     between prologue and epilogue code.  */
+	  if (size < 0)
+	    {
+	      emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
+	      insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
+	    }
+	  else
+	    {
+	      emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
+	      insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
+	    }
+	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+			gen_rtx_SET (VOIDmode, reg,
+				     gen_rtx_PLUS (SImode, reg,
+						   GEN_INT (size))));
+	}
+    }
+}
+
+/* Emit the specified insn and mark it as frame related.
+   FIXME: Rename this to emit_frame_insn.  */
+static rtx
+frame_insn (rtx x)
+{
+  x = emit_insn (x);
+  RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* Output RTL to push register RN onto the stack.  */
+static rtx
+push (int rn)
+{
+  rtx x;
+  if (rn == FPUL_REG)
+    x = gen_push_fpul ();
+  else if (rn == FPSCR_REG)
+    x = gen_push_fpscr ();
+  else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
+	   && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
+    {
+      if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
+	return NULL_RTX;
+      x = gen_push_4 (gen_rtx_REG (DFmode, rn));
+    }
+  else if (TARGET_SH2E && FP_REGISTER_P (rn))
+    x = gen_push_e (gen_rtx_REG (SFmode, rn));
+  else
+    x = gen_push (gen_rtx_REG (SImode, rn));
+
+  x = frame_insn (x);
+  add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
+  return x;
+}
+
+/* Output RTL to pop register RN from the stack.  */
+static void
+pop (int rn)
+{
+  rtx x, sp_reg, reg;
+  if (rn == FPUL_REG)
+    x = gen_pop_fpul ();
+  else if (rn == FPSCR_REG)
+    x = gen_pop_fpscr ();
+  else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
+	   && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
+    {
+      if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
+	return;
+      x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
+    }
+  else if (TARGET_SH2E && FP_REGISTER_P (rn))
+    x = gen_pop_e (gen_rtx_REG (SFmode, rn));
+  else
+    x = gen_pop (gen_rtx_REG (SImode, rn));
+
+  x = emit_insn (x);
+
+  sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+  reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
+		  ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
+		  : SET_DEST (PATTERN (x)));
+  add_reg_note (x, REG_CFA_RESTORE, reg);
+  add_reg_note (x, REG_CFA_ADJUST_CFA,
+		gen_rtx_SET (SImode, sp_reg,
+			     plus_constant (SImode, sp_reg,
+					    GET_MODE_SIZE (GET_MODE (reg)))));
+  add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
+  RTX_FRAME_RELATED_P (x) = 1;
+}
+
+/* Generate code to push the regs specified in the mask.  */
+static void
+push_regs (HARD_REG_SET *mask, int interrupt_handler)
+{
+  int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
+  int skip_fpscr = 0;
+
+  /* Push PR last; this gives better latencies after the prologue, and
+     candidates for the return delay slot when there are no general
+     registers pushed.  */
+  for (; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      /* If this is an interrupt handler, and the SZ bit varies,
+	 and we have to push any floating point register, we need
+	 to switch to the correct precision first.  */
+      if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
+	  && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
+	{
+	  HARD_REG_SET unsaved;
+
+	  push (FPSCR_REG);
+	  COMPL_HARD_REG_SET (unsaved, *mask);
+	  fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
+	  skip_fpscr = 1;
+	}
+      if (i != PR_REG
+	  && (i != FPSCR_REG || ! skip_fpscr)
+	  && TEST_HARD_REG_BIT (*mask, i))
+	{
+	/* If the ISR has RESBANK attribute assigned, don't push any of
+	   the following registers - R0-R14, MACH, MACL and GBR.  */
+      if (! (sh_cfun_resbank_handler_p ()
+	     && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
+		 || i == MACH_REG
+		 || i == MACL_REG
+		 || i == GBR_REG)))
+	  push (i);
+	}
+    }
+
+  /* Push banked registers last to improve delay slot opportunities.  */
+  if (interrupt_handler)
+    {
+      bool use_movml = false;
+
+      if (TARGET_SH2A)
+	{
+	  unsigned int count = 0;
+
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	    if (TEST_HARD_REG_BIT (*mask, i))
+	      count++;
+	    else
+	      break;
+
+	  /* Use movml when all banked registers are pushed.  */
+	  if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
+	    use_movml = true;
+	}
+
+      if (sh_cfun_resbank_handler_p ())
+	; /* Do nothing.  */
+      else if (use_movml)
+	{
+	  rtx x, mem, reg, set;
+	  rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+
+	  /* We must avoid scheduling multiple store insn with another
+	     insns.  */
+	  emit_insn (gen_blockage ());
+	  x = gen_movml_push_banked (sp_reg);
+	  x = frame_insn (x);
+	  for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	    {
+	      mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
+	      reg = gen_rtx_REG (SImode, i);
+	      add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
+	    }
+
+	  set = gen_rtx_SET (SImode, sp_reg,
+			     plus_constant (Pmode, sp_reg, - 32));
+	  add_reg_note (x, REG_CFA_ADJUST_CFA, set);
+	  emit_insn (gen_blockage ());
+	}
+      else
+	for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+	  if (TEST_HARD_REG_BIT (*mask, i))
+	    push (i);
+    }
+
+  /* Don't push PR register for an ISR with RESBANK attribute assigned.  */
+  if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
+    push (PR_REG);
+}
+
+/* Calculate how much extra space is needed to save all callee-saved
+   target registers.
+   LIVE_REGS_MASK is the register mask calculated by calc_live_regs.  */
+static int
+shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
+{
+  int reg;
+  int stack_space = 0;
+  int interrupt_handler = sh_cfun_interrupt_handler_p ();
+
+  for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
+    if ((! call_really_used_regs[reg] || interrupt_handler)
+	&& ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
+      /* Leave space to save this target register on the stack,
+	 in case target register allocation wants to use it.  */
+      stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
+  return stack_space;
+}
+
+/* Decide whether we should reserve space for callee-save target registers,
+   in case target register allocation wants to use them.  REGS_SAVED is
+   the space, in bytes, that is already required for register saves.
+   LIVE_REGS_MASK is the register mask calculated by calc_live_regs.  */
+static int
+shmedia_reserve_space_for_target_registers_p (int regs_saved,
+					      HARD_REG_SET *live_regs_mask)
+{
+  if (optimize_size)
+    return 0;
+  return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
+}
+
+/* Decide how much space to reserve for callee-save target registers
+   in case target register allocation wants to use them.
+   LIVE_REGS_MASK is the register mask calculated by calc_live_regs.  */
+static int
+shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
+{
+  if (shmedia_space_reserved_for_target_registers)
+    return shmedia_target_regs_stack_space (live_regs_mask);
+  else
+    return 0;
+}
+
+/* Work out the registers which need to be saved, both as a mask and a
+   count of saved words.  Return the count.
+
+   If doing a pragma interrupt function, then push all regs used by the
+   function, and if we call another function (we can tell by looking at PR),
+   make sure that all the regs it clobbers are safe too.  */
+static int
+calc_live_regs (HARD_REG_SET *live_regs_mask)
+{
+  unsigned int reg;
+  int count;
+  tree attrs;
+  bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
+  bool nosave_low_regs;
+  int pr_live, has_call;
+
+  attrs = DECL_ATTRIBUTES (current_function_decl);
+  interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
+  trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
+  interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
+  nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
+
+  CLEAR_HARD_REG_SET (*live_regs_mask);
+  if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
+      && df_regs_ever_live_p (FPSCR_REG))
+    target_flags &= ~MASK_FPU_SINGLE;
+  /* If we can save a lot of saves by switching to double mode, do that.  */
+  else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
+	   && TARGET_FPU_SINGLE)
+    for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
+      if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
+	  && (! call_really_used_regs[reg]
+	      || interrupt_handler)
+	  && ++count > 2)
+	{
+	  target_flags &= ~MASK_FPU_SINGLE;
+	  break;
+	}
+  /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
+     knows how to use it.  That means the pseudo originally allocated for
+     the initial value can become the PR_MEDIA_REG hard register, as seen for
+     execute/20010122-1.c:test9.  */
+  if (TARGET_SHMEDIA)
+    /* ??? this function is called from initial_elimination_offset, hence we
+       can't use the result of sh_media_register_for_return here.  */
+    pr_live = sh_pr_n_sets ();
+  else
+    {
+      rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
+      pr_live = (pr_initial
+		 ? (!REG_P (pr_initial)
+		    || REGNO (pr_initial) != (PR_REG))
+		 : df_regs_ever_live_p (PR_REG));
+      /* For Shcompact, if not optimizing, we end up with a memory reference
+	 using the return address pointer for __builtin_return_address even
+	 though there is no actual need to put the PR register on the stack.  */
+      pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
+    }
+  /* Force PR to be live if the prologue has to call the SHmedia
+     argument decoder or register saver.  */
+  if (TARGET_SHCOMPACT
+      && ((crtl->args.info.call_cookie
+	   & ~ CALL_COOKIE_RET_TRAMP (1))
+	  || crtl->saves_all_registers))
+    pr_live = 1;
+  has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
+  for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
+    {
+      if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
+	  ? pr_live
+	  : interrupt_handler
+	  ? (/* Need to save all the regs ever live.  */
+	     (df_regs_ever_live_p (reg)
+	      || (call_really_used_regs[reg]
+		  && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
+		      || reg == PIC_OFFSET_TABLE_REGNUM)
+		  && has_call)
+	      || (TARGET_SHMEDIA && has_call
+		  && REGISTER_NATURAL_MODE (reg) == SImode
+		  && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
+	     && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
+	     && reg != RETURN_ADDRESS_POINTER_REGNUM
+	     && reg != T_REG && reg != GBR_REG
+	     /* Push fpscr only on targets which have FPU */
+	     && (reg != FPSCR_REG || TARGET_FPU_ANY))
+	  : (/* Only push those regs which are used and need to be saved.  */
+	     (TARGET_SHCOMPACT
+	      && flag_pic
+	      && crtl->args.info.call_cookie
+	      && reg == PIC_OFFSET_TABLE_REGNUM)
+	     || (df_regs_ever_live_p (reg)
+		 && ((!call_really_used_regs[reg]
+		      && !(reg != PIC_OFFSET_TABLE_REGNUM
+			   && fixed_regs[reg] && call_used_regs[reg]))
+		     || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
+	     || (crtl->calls_eh_return
+		 && (reg == EH_RETURN_DATA_REGNO (0)
+		     || reg == EH_RETURN_DATA_REGNO (1)
+		     || reg == EH_RETURN_DATA_REGNO (2)
+		     || reg == EH_RETURN_DATA_REGNO (3)))
+	     || ((reg == MACL_REG || reg == MACH_REG)
+		 && df_regs_ever_live_p (reg)
+		 && sh_cfun_attr_renesas_p ())
+	     ))
+	{
+	  SET_HARD_REG_BIT (*live_regs_mask, reg);
+	  count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
+
+	  if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
+	      && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
+	    {
+	      if (FP_REGISTER_P (reg))
+		{
+		  if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
+		    {
+		      SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
+		      count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
+		    }
+		}
+	      else if (XD_REGISTER_P (reg))
+		{
+		  /* Must switch to double mode to access these registers.  */
+		  target_flags &= ~MASK_FPU_SINGLE;
+		}
+	    }
+	}
+      if (nosave_low_regs && reg == R8_REG)
+	break;
+    }
+  /* If we have a target register optimization pass after prologue / epilogue
+     threading, we need to assume all target registers will be live even if
+     they aren't now.  */
+  if (flag_branch_target_load_optimize2
+      && TARGET_SAVE_ALL_TARGET_REGS
+      && shmedia_space_reserved_for_target_registers)
+    for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
+      if ((! call_really_used_regs[reg] || interrupt_handler)
+	  && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
+	{
+	  SET_HARD_REG_BIT (*live_regs_mask, reg);
+	  count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
+	}
+  /* If this is an interrupt handler, we don't have any call-clobbered
+     registers we can conveniently use for target register save/restore.
+     Make sure we save at least one general purpose register when we need
+     to save target registers.  */
+  if (interrupt_handler
+      && hard_reg_set_intersect_p (*live_regs_mask,
+				   reg_class_contents[TARGET_REGS])
+      && ! hard_reg_set_intersect_p (*live_regs_mask,
+				     reg_class_contents[GENERAL_REGS]))
+    {
+      SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
+      count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
+    }
+
+  return count;
+}
+
+/* Code to generate prologue and epilogue sequences */
+
+/* PUSHED is the number of bytes that are being pushed on the
+   stack for register saves.  Return the frame size, padded
+   appropriately so that the stack stays properly aligned.  */
+static HOST_WIDE_INT
+rounded_frame_size (int pushed)
+{
+  HOST_WIDE_INT size = get_frame_size ();
+  HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
+
+  if (ACCUMULATE_OUTGOING_ARGS)
+    size += crtl->outgoing_args_size;
+
+  return ((size + pushed + align - 1) & -align) - pushed;
+}
+
+/* Choose a call-clobbered target-branch register that remains
+   unchanged along the whole function.  We set it up as the return
+   value in the prologue.  */
+int
+sh_media_register_for_return (void)
+{
+  int regno;
+  int tr0_used;
+
+  if (! crtl->is_leaf)
+    return -1;
+  if (lookup_attribute ("interrupt_handler",
+			DECL_ATTRIBUTES (current_function_decl)))
+    return -1;
+  if (sh_cfun_interrupt_handler_p ())
+    return -1;
+
+  tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
+
+  for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
+    if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
+      return regno;
+
+  return -1;
+}
+
+/* The maximum registers we need to save are:
+   - 62 general purpose registers (r15 is stack pointer, r63 is zero)
+   - 32 floating point registers (for each pair, we save none,
+         one single precision value, or a double precision value).
+   -  8 target registers
+   -  add 1 entry for a delimiter.  */
+#define MAX_SAVED_REGS (62+32+8)
+
+typedef struct save_entry_s
+{
+  unsigned char reg;
+  unsigned char mode;
+  short offset;
+} save_entry;
+
+#define MAX_TEMPS 4
+
+/* There will be a delimiter entry with VOIDmode both at the start and the
+   end of a filled in schedule.  The end delimiter has the offset of the
+   save with the smallest (i.e. most negative) offset.  */
+typedef struct save_schedule_s
+{
+  save_entry entries[MAX_SAVED_REGS + 2];
+  int temps[MAX_TEMPS+1];
+} save_schedule;
+
+/* Fill in SCHEDULE according to LIVE_REGS_MASK.  If RESTORE is nonzero,
+   use reverse order.  Returns the last entry written to (not counting
+   the delimiter).  OFFSET_BASE is a number to be added to all offset
+   entries.  */
+static save_entry *
+sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
+		    int offset_base)
+{
+  int align, i;
+  save_entry *entry = schedule->entries;
+  int tmpx = 0;
+  int offset;
+
+  if (! current_function_interrupt)
+    for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
+      if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
+	  && ! FUNCTION_ARG_REGNO_P (i)
+	  && i != FIRST_RET_REG
+	  && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
+	  && ! (crtl->calls_eh_return
+		&& (i == EH_RETURN_STACKADJ_REGNO
+		    || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
+			&& (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
+	schedule->temps[tmpx++] = i;
+  entry->reg = -1;
+  entry->mode = VOIDmode;
+  entry->offset = offset_base;
+  entry++;
+  /* We loop twice: first, we save 8-byte aligned registers in the
+     higher addresses, that are known to be aligned.  Then, we
+     proceed to saving 32-bit registers that don't need 8-byte
+     alignment.
+     If this is an interrupt function, all registers that need saving
+     need to be saved in full.  moreover, we need to postpone saving
+     target registers till we have saved some general purpose registers
+     we can then use as scratch registers.  */
+  offset = offset_base;
+  for (align = 1; align >= 0; align--)
+    {
+      for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
+	if (TEST_HARD_REG_BIT (*live_regs_mask, i))
+	  {
+	    enum machine_mode mode = REGISTER_NATURAL_MODE (i);
+	    int reg = i;
+
+	    if (current_function_interrupt)
+	      {
+		if (TARGET_REGISTER_P (i))
+		  continue;
+		if (GENERAL_REGISTER_P (i))
+		  mode = DImode;
+	      }
+	    if (mode == SFmode && (i % 2) == 1
+		&& ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
+		&& (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
+	      {
+		mode = DFmode;
+		i--;
+		reg--;
+	      }
+
+	    /* If we're doing the aligned pass and this is not aligned,
+	       or we're doing the unaligned pass and this is aligned,
+	       skip it.  */
+	    if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
+		!= align)
+	      continue;
+
+	    if (current_function_interrupt
+		&& GENERAL_REGISTER_P (i)
+		&& tmpx < MAX_TEMPS)
+	      schedule->temps[tmpx++] = i;
+
+	    offset -= GET_MODE_SIZE (mode);
+	    entry->reg = i;
+	    entry->mode = mode;
+	    entry->offset = offset;
+	    entry++;
+	  }
+      if (align && current_function_interrupt)
+	for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
+	  if (TEST_HARD_REG_BIT (*live_regs_mask, i))
+	    {
+	      offset -= GET_MODE_SIZE (DImode);
+	      entry->reg = i;
+	      entry->mode = DImode;
+	      entry->offset = offset;
+	      entry++;
+	    }
+    }
+  entry->reg = -1;
+  entry->mode = VOIDmode;
+  entry->offset = offset;
+  schedule->temps[tmpx] = -1;
+  return entry - 1;
+}
+
+/* Expand code for the function prologue.  */
+void
+sh_expand_prologue (void)
+{
+  HARD_REG_SET live_regs_mask;
+  int d, i;
+  int d_rounding = 0;
+  int save_flags = target_flags;
+  int pretend_args;
+  int stack_usage;
+  tree sp_switch_attr
+    = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
+
+  current_function_interrupt = sh_cfun_interrupt_handler_p ();
+
+  /* We have pretend args if we had an object sent partially in registers
+     and partially on the stack, e.g. a large structure.  */
+  pretend_args = crtl->args.pretend_args_size;
+  if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
+      && (NPARM_REGS(SImode)
+	  > crtl->args.info.arg_count[(int) SH_ARG_INT]))
+    pretend_args = 0;
+
+  output_stack_adjust (-pretend_args
+		       - crtl->args.info.stack_regs * 8,
+		       stack_pointer_rtx, 0, NULL, true);
+  stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
+
+  if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
+    /* We're going to use the PIC register to load the address of the
+       incoming-argument decoder and/or of the return trampoline from
+       the GOT, so make sure the PIC register is preserved and
+       initialized.  */
+    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
+
+  if (TARGET_SHCOMPACT
+      && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
+    {
+      int reg;
+
+      /* First, make all registers with incoming arguments that will
+	 be pushed onto the stack live, so that register renaming
+	 doesn't overwrite them.  */
+      for (reg = 0; reg < NPARM_REGS (SImode); reg++)
+	if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
+	    >= NPARM_REGS (SImode) - reg)
+	  for (; reg < NPARM_REGS (SImode); reg++)
+	    emit_insn (gen_shcompact_preserve_incoming_args
+		       (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
+	else if (CALL_COOKIE_INT_REG_GET
+		 (crtl->args.info.call_cookie, reg) == 1)
+	  emit_insn (gen_shcompact_preserve_incoming_args
+		     (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
+
+      emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
+		      stack_pointer_rtx);
+      emit_move_insn (gen_rtx_REG (SImode, R0_REG),
+		      GEN_INT (crtl->args.info.call_cookie));
+      emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
+		      gen_rtx_REG (SImode, R0_REG));
+    }
+  else if (TARGET_SHMEDIA)
+    {
+      int tr = sh_media_register_for_return ();
+
+      if (tr >= 0)
+	emit_move_insn (gen_rtx_REG (DImode, tr),
+			gen_rtx_REG (DImode, PR_MEDIA_REG));
+    }
+
+  /* Emit the code for SETUP_VARARGS.  */
+  if (cfun->stdarg)
+    {
+      if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
+	{
+	  /* Push arg regs as if they'd been provided by caller in stack.  */
+	  for (i = 0; i < NPARM_REGS(SImode); i++)
+	    {
+	      int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
+
+	      if (i >= (NPARM_REGS(SImode)
+			- crtl->args.info.arg_count[(int) SH_ARG_INT]
+			))
+		break;
+	      push (rn);
+	      stack_usage += GET_MODE_SIZE (SImode);
+	    }
+	}
+    }
+
+  /* If we're supposed to switch stacks at function entry, do so now.  */
+  if (sp_switch_attr)
+    {
+      rtx lab, newsrc;
+      /* The argument specifies a variable holding the address of the
+	 stack the interrupt function should switch to/from at entry/exit.  */
+      tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
+      const char *s
+	= ggc_strdup (TREE_STRING_POINTER (arg));
+      rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
+
+      lab = add_constant (sp_switch, SImode, 0);
+      newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
+
+      emit_insn (gen_sp_switch_1 (newsrc));
+    }
+
+  d = calc_live_regs (&live_regs_mask);
+  /* ??? Maybe we could save some switching if we can move a mode switch
+     that already happens to be at the function start into the prologue.  */
+  if (target_flags != save_flags && ! current_function_interrupt)
+    emit_insn (gen_toggle_sz ());
+
+  if (TARGET_SH5)
+    {
+      int offset_base, offset;
+      rtx r0 = NULL_RTX;
+      int offset_in_r0 = -1;
+      int sp_in_r0 = 0;
+      int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
+      int total_size, save_size;
+      save_schedule schedule;
+      save_entry *entry;
+      int *tmp_pnt;
+
+      if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
+	  && ! current_function_interrupt)
+	r0 = gen_rtx_REG (Pmode, R0_REG);
+
+      /* D is the actual number of bytes that we need for saving registers,
+	 however, in initial_elimination_offset we have committed to using
+	 an additional TREGS_SPACE amount of bytes - in order to keep both
+	 addresses to arguments supplied by the caller and local variables
+	 valid, we must keep this gap.  Place it between the incoming
+	 arguments and the actually saved registers in a bid to optimize
+	 locality of reference.  */
+      total_size = d + tregs_space;
+      total_size += rounded_frame_size (total_size);
+      save_size = total_size - rounded_frame_size (d);
+      if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
+	d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
+			- save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
+
+      /* If adjusting the stack in a single step costs nothing extra, do so.
+	 I.e. either if a single addi is enough, or we need a movi anyway,
+	 and we don't exceed the maximum offset range (the test for the
+	 latter is conservative for simplicity).  */
+      if (TARGET_SHMEDIA
+	  && (CONST_OK_FOR_I10 (-total_size)
+	      || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
+		  && total_size <= 2044)))
+	d_rounding = total_size - save_size;
+
+      offset_base = d + d_rounding;
+
+      output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
+			   0, NULL, true);
+      stack_usage += save_size + d_rounding;
+
+      sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
+      tmp_pnt = schedule.temps;
+      for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
+        {
+	  enum machine_mode mode = (enum machine_mode) entry->mode;
+	  unsigned int reg = entry->reg;
+	  rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
+	  rtx orig_reg_rtx;
+
+	  offset = entry->offset;
+
+	  reg_rtx = gen_rtx_REG (mode, reg);
+
+	  mem_rtx = gen_frame_mem (mode,
+				   gen_rtx_PLUS (Pmode,
+						 stack_pointer_rtx,
+						 GEN_INT (offset)));
+
+	  if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
+	    {
+	      gcc_assert (r0);
+	      mem_rtx = NULL_RTX;
+	    }
+
+	  if (HAVE_PRE_DECREMENT
+	      && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
+		  || mem_rtx == NULL_RTX
+		  || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
+	    {
+	      pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
+
+	      if (!memory_address_p (mode, XEXP (pre_dec, 0)))
+		pre_dec = NULL_RTX;
+	      else
+		{
+		  mem_rtx = NULL_RTX;
+		  offset += GET_MODE_SIZE (mode);
+		}
+	    }
+
+	  if (mem_rtx != NULL_RTX)
+	    goto addr_ok;
+
+	  if (offset_in_r0 == -1)
+	    {
+	      emit_move_insn (r0, GEN_INT (offset));
+	      offset_in_r0 = offset;
+	    }
+	  else if (offset != offset_in_r0)
+	    {
+	      emit_move_insn (r0,
+			      gen_rtx_PLUS
+			      (Pmode, r0,
+			       GEN_INT (offset - offset_in_r0)));
+	      offset_in_r0 += offset - offset_in_r0;
+	    }
+
+	  if (pre_dec != NULL_RTX)
+	    {
+	      if (! sp_in_r0)
+		{
+		  emit_move_insn (r0,
+				  gen_rtx_PLUS
+				  (Pmode, r0, stack_pointer_rtx));
+		  sp_in_r0 = 1;
+		}
+
+	      offset -= GET_MODE_SIZE (mode);
+	      offset_in_r0 -= GET_MODE_SIZE (mode);
+
+	      mem_rtx = pre_dec;
+	    }
+	  else if (sp_in_r0)
+	    mem_rtx = gen_frame_mem (mode, r0);
+	  else
+	    mem_rtx = gen_frame_mem (mode,
+				     gen_rtx_PLUS (Pmode,
+						   stack_pointer_rtx,
+						   r0));
+
+	  /* We must not use an r0-based address for target-branch
+	     registers or for special registers without pre-dec
+	     memory addresses, since we store their values in r0
+	     first.  */
+	  gcc_assert (!TARGET_REGISTER_P (reg)
+		      && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
+			  || mem_rtx == pre_dec));
+	  
+	addr_ok:
+	  orig_reg_rtx = reg_rtx;
+	  if (TARGET_REGISTER_P (reg)
+	      || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
+		  && mem_rtx != pre_dec))
+	    {
+	      rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
+
+	      emit_move_insn (tmp_reg, reg_rtx);
+
+	      if (REGNO (tmp_reg) == R0_REG)
+		{
+		  offset_in_r0 = -1;
+		  sp_in_r0 = 0;
+		  gcc_assert (!refers_to_regno_p
+			      (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
+		}
+
+	      if (*++tmp_pnt <= 0)
+		tmp_pnt = schedule.temps;
+
+	      reg_rtx = tmp_reg;
+	    }
+	  {
+	    rtx insn;
+
+	    /* Mark as interesting for dwarf cfi generator */
+	    insn = emit_move_insn (mem_rtx, reg_rtx);
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    /* If we use an intermediate register for the save, we can't
+	       describe this exactly in cfi as a copy of the to-be-saved
+	       register into the temporary register and then the temporary
+	       register on the stack, because the temporary register can
+	       have a different natural size than the to-be-saved register.
+	       Thus, we gloss over the intermediate copy and pretend we do
+	       a direct save from the to-be-saved register.  */
+	    if (REGNO (reg_rtx) != reg)
+	      {
+		rtx set;
+
+		set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
+		add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+	      }
+
+	    if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
+	      {
+		rtx reg_rtx = gen_rtx_REG (mode, reg);
+		rtx set;
+		rtx mem_rtx = gen_frame_mem (mode,
+					     gen_rtx_PLUS (Pmode,
+							   stack_pointer_rtx,
+							   GEN_INT (offset)));
+
+		set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
+		add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+	      }
+	  }
+	}
+
+      gcc_assert (entry->offset == d_rounding);
+    }
+  else
+    {
+      push_regs (&live_regs_mask, current_function_interrupt);
+      stack_usage += d;
+    }
+
+  if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
+    emit_insn (gen_GOTaddr2picreg ());
+
+  if (SHMEDIA_REGS_STACK_ADJUST ())
+    {
+      /* This must NOT go through the PLT, otherwise mach and macl
+	 may be clobbered.  */
+      function_symbol (gen_rtx_REG (Pmode, R0_REG),
+		       (TARGET_FPU_ANY
+			? "__GCC_push_shmedia_regs"
+			: "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
+      emit_insn (gen_shmedia_save_restore_regs_compact
+		 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
+    }
+
+  if (target_flags != save_flags && ! current_function_interrupt)
+    emit_insn (gen_toggle_sz ());
+
+  target_flags = save_flags;
+
+  output_stack_adjust (-rounded_frame_size (d) + d_rounding,
+		       stack_pointer_rtx, 0, NULL, true);
+  stack_usage += rounded_frame_size (d) - d_rounding;
+
+  if (frame_pointer_needed)
+    frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
+
+  if (TARGET_SHCOMPACT
+      && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
+    {
+      /* This must NOT go through the PLT, otherwise mach and macl
+	 may be clobbered.  */
+      function_symbol (gen_rtx_REG (Pmode, R0_REG),
+		      "__GCC_shcompact_incoming_args", SFUNC_GOT);
+      emit_insn (gen_shcompact_incoming_args ());
+    }
+
+  /* If we are profiling, make sure no instructions are scheduled before
+     the call to mcount.  Similarly if some call instructions are swapped
+     before frame related insns, it'll confuse the unwinder because
+     currently SH has no unwind info for function epilogues.  */
+  if (crtl->profile || flag_exceptions || flag_unwind_tables)
+    emit_insn (gen_blockage ());
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = stack_usage;
+}
+
+/* Expand code for the function epilogue.  */
+void
+sh_expand_epilogue (bool sibcall_p)
+{
+  HARD_REG_SET live_regs_mask;
+  int d, i;
+  int d_rounding = 0;
+
+  int save_flags = target_flags;
+  int frame_size, save_size;
+  int fpscr_deferred = 0;
+  int e = sibcall_p ? -1 : 1;
+
+  d = calc_live_regs (&live_regs_mask);
+
+  save_size = d;
+  frame_size = rounded_frame_size (d);
+
+  if (TARGET_SH5)
+    {
+      int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
+      int total_size;
+      if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
+	d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
+		      - d % (STACK_BOUNDARY / BITS_PER_UNIT));
+
+      total_size = d + tregs_space;
+      total_size += rounded_frame_size (total_size);
+      save_size = total_size - frame_size;
+
+      /* If adjusting the stack in a single step costs nothing extra, do so.
+	 I.e. either if a single addi is enough, or we need a movi anyway,
+	 and we don't exceed the maximum offset range (the test for the
+	 latter is conservative for simplicity).  */
+      if (TARGET_SHMEDIA
+	  && ! frame_pointer_needed
+	  && (CONST_OK_FOR_I10 (total_size)
+	      || (! CONST_OK_FOR_I10 (save_size + d_rounding)
+		  && total_size <= 2044)))
+	d_rounding = frame_size;
+
+      frame_size -= d_rounding;
+    }
+
+  if (frame_pointer_needed)
+    {
+      /* We must avoid scheduling the epilogue with previous basic blocks.
+	 See PR/18032 and PR/40313.  */
+      emit_insn (gen_blockage ());
+      output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
+			   &live_regs_mask, true);
+
+      /* We must avoid moving the stack pointer adjustment past code
+	 which reads from the local frame, else an interrupt could
+	 occur after the SP adjustment and clobber data in the local
+	 frame.  */
+      emit_insn (gen_blockage ());
+      frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
+    }
+  else if (frame_size)
+    {
+      /* We must avoid moving the stack pointer adjustment past code
+	 which reads from the local frame, else an interrupt could
+	 occur after the SP adjustment and clobber data in the local
+	 frame.  */
+      emit_insn (gen_blockage ());
+      output_stack_adjust (frame_size, stack_pointer_rtx, e,
+			   &live_regs_mask, true);
+    }
+
+  if (SHMEDIA_REGS_STACK_ADJUST ())
+    {
+      function_symbol (gen_rtx_REG (Pmode, R0_REG),
+		       (TARGET_FPU_ANY
+			? "__GCC_pop_shmedia_regs"
+			: "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
+      /* This must NOT go through the PLT, otherwise mach and macl
+	 may be clobbered.  */
+      emit_insn (gen_shmedia_save_restore_regs_compact
+		 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
+    }
+
+  /* Pop all the registers.  */
+
+  if (target_flags != save_flags && ! current_function_interrupt)
+    emit_insn (gen_toggle_sz ());
+  if (TARGET_SH5)
+    {
+      int offset_base, offset;
+      int offset_in_r0 = -1;
+      int sp_in_r0 = 0;
+      rtx r0 = gen_rtx_REG (Pmode, R0_REG);
+      save_schedule schedule;
+      save_entry *entry;
+      int *tmp_pnt;
+
+      entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
+      offset_base = -entry[1].offset + d_rounding;
+      tmp_pnt = schedule.temps;
+      for (; entry->mode != VOIDmode; entry--)
+	{
+	  enum machine_mode mode = (enum machine_mode) entry->mode;
+	  int reg = entry->reg;
+	  rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
+
+	  offset = offset_base + entry->offset;
+	  reg_rtx = gen_rtx_REG (mode, reg);
+
+	  mem_rtx = gen_frame_mem (mode,
+				   gen_rtx_PLUS (Pmode,
+						 stack_pointer_rtx,
+						 GEN_INT (offset)));
+
+	  if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
+	    mem_rtx = NULL_RTX;
+
+	  if (HAVE_POST_INCREMENT
+	      && (offset == offset_in_r0
+		  || (offset + GET_MODE_SIZE (mode) != d + d_rounding
+		      && mem_rtx == NULL_RTX)
+		  || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
+	    {
+	      post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
+
+	      if (!memory_address_p (mode, XEXP (post_inc, 0)))
+		post_inc = NULL_RTX;
+	      else
+		mem_rtx = NULL_RTX;
+	    }
+
+	  if (mem_rtx != NULL_RTX)
+	    goto addr_ok;
+
+	  if (offset_in_r0 == -1)
+	    {
+	      emit_move_insn (r0, GEN_INT (offset));
+	      offset_in_r0 = offset;
+	    }
+	  else if (offset != offset_in_r0)
+	    {
+	      emit_move_insn (r0,
+			      gen_rtx_PLUS
+			      (Pmode, r0,
+			       GEN_INT (offset - offset_in_r0)));
+	      offset_in_r0 += offset - offset_in_r0;
+	    }
+
+	  if (post_inc != NULL_RTX)
+	    {
+	      if (! sp_in_r0)
+		{
+		  emit_move_insn (r0,
+				  gen_rtx_PLUS
+				  (Pmode, r0, stack_pointer_rtx));
+		  sp_in_r0 = 1;
+		}
+
+	      mem_rtx = post_inc;
+
+	      offset_in_r0 += GET_MODE_SIZE (mode);
+	    }
+	  else if (sp_in_r0)
+	    mem_rtx = gen_frame_mem (mode, r0);
+	  else
+	    mem_rtx = gen_frame_mem (mode,
+				     gen_rtx_PLUS (Pmode,
+						   stack_pointer_rtx,
+						   r0));
+
+	  gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
+		      || mem_rtx == post_inc);
+
+	addr_ok:
+	  if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
+	      && mem_rtx != post_inc)
+	    {
+	      emit_move_insn (r0, mem_rtx);
+	      mem_rtx = r0;
+	    }
+	  else if (TARGET_REGISTER_P (reg))
+	    {
+	      rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
+
+	      /* Give the scheduler a bit of freedom by using up to
+		 MAX_TEMPS registers in a round-robin fashion.  */
+	      emit_move_insn (tmp_reg, mem_rtx);
+	      mem_rtx = tmp_reg;
+	      if (*++tmp_pnt < 0)
+		tmp_pnt = schedule.temps;
+	    }
+
+	  emit_move_insn (reg_rtx, mem_rtx);
+	}
+
+      gcc_assert (entry->offset + offset_base == d + d_rounding);
+    }
+  else /* ! TARGET_SH5 */
+    {
+      int last_reg;
+
+      save_size = 0;
+	/* For an ISR with RESBANK attribute assigned, don't pop PR
+	   register.  */
+      if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
+	  && !sh_cfun_resbank_handler_p ())	
+	{
+	  if (!frame_pointer_needed)
+	    emit_insn (gen_blockage ());
+	  pop (PR_REG);
+	}
+
+      /* Banked registers are popped first to avoid being scheduled in the
+	 delay slot. RTE switches banks before the ds instruction.  */
+      if (current_function_interrupt)
+	{
+	  bool use_movml = false;
+
+	  if (TARGET_SH2A)
+	    {
+	      unsigned int count = 0;
+
+	      for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
+		if (TEST_HARD_REG_BIT (live_regs_mask, i))
+		  count++;
+		else
+		  break;
+
+	      /* Use movml when all banked register are poped.  */
+	      if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
+		use_movml = true;
+	    }
+
+	  if (sh_cfun_resbank_handler_p ())
+	    ; /* Do nothing.  */
+	  else if (use_movml)
+	    {
+	      rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
+
+	      /* We must avoid scheduling multiple load insn with another
+		 insns.  */
+	      emit_insn (gen_blockage ());
+	      emit_insn (gen_movml_pop_banked (sp_reg));
+	      emit_insn (gen_blockage ());
+	    }
+	  else
+	    for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
+	      if (TEST_HARD_REG_BIT (live_regs_mask, i))
+		pop (i);
+
+	  last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
+	}
+      else
+	last_reg = FIRST_PSEUDO_REGISTER;
+
+      for (i = 0; i < last_reg; i++)
+	{
+	  int j = (FIRST_PSEUDO_REGISTER - 1) - i;
+
+	  if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
+	      && hard_reg_set_intersect_p (live_regs_mask,
+					  reg_class_contents[DF_REGS]))
+	    fpscr_deferred = 1;
+	  /* For an ISR with RESBANK attribute assigned, don't pop
+	     following registers, R0-R14, MACH, MACL and GBR.  */
+	  else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j) 
+		   && ! (sh_cfun_resbank_handler_p ()
+			 && ((j >= FIRST_GENERAL_REG
+			      && j < LAST_GENERAL_REG)
+			      || j == MACH_REG
+			      || j == MACL_REG
+			      || j == GBR_REG)))
+	    pop (j);
+
+	  if (j == FIRST_FP_REG && fpscr_deferred)
+	    pop (FPSCR_REG);
+	}
+    }
+  if (target_flags != save_flags && ! current_function_interrupt)
+    emit_insn (gen_toggle_sz ());
+  target_flags = save_flags;
+
+  output_stack_adjust (crtl->args.pretend_args_size
+		       + save_size + d_rounding
+		       + crtl->args.info.stack_regs * 8,
+		       stack_pointer_rtx, e, NULL, true);
+
+  if (crtl->calls_eh_return)
+    emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
+			 EH_RETURN_STACKADJ_RTX));
+
+  /* Switch back to the normal stack if necessary.  */
+  if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
+    emit_insn (gen_sp_switch_2 ());
+
+  /* Tell flow the insn that pops PR isn't dead.  */
+  /* PR_REG will never be live in SHmedia mode, and we don't need to
+     USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
+     by the return pattern.  */
+  if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
+    emit_use (gen_rtx_REG (SImode, PR_REG));
+}
+
+/* Emit code to change the current function's return address to RA.
+   TEMP is available as a scratch register, if needed.  */
+void
+sh_set_return_address (rtx ra, rtx tmp)
+{
+  HARD_REG_SET live_regs_mask;
+  int d;
+  int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
+  int pr_offset;
+
+  d = calc_live_regs (&live_regs_mask);
+
+  /* If pr_reg isn't life, we can set it (or the register given in
+     sh_media_register_for_return) directly.  */
+  if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
+    {
+      rtx rr;
+
+      if (TARGET_SHMEDIA)
+	{
+	  int rr_regno = sh_media_register_for_return ();
+
+	  if (rr_regno < 0)
+	    rr_regno = pr_reg;
+
+	  rr = gen_rtx_REG (DImode, rr_regno);
+	}
+      else
+	rr = gen_rtx_REG (SImode, pr_reg);
+
+      emit_insn (GEN_MOV (rr, ra));
+      /* Tell flow the register for return isn't dead.  */
+      emit_use (rr);
+      return;
+    }
+
+  if (TARGET_SH5)
+    {
+      int offset;
+      save_schedule schedule;
+      save_entry *entry;
+
+      entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
+      offset = entry[1].offset;
+      for (; entry->mode != VOIDmode; entry--)
+	if (entry->reg == pr_reg)
+	  goto found;
+
+      /* We can't find pr register.  */
+      gcc_unreachable ();
+
+    found:
+      offset = entry->offset - offset;
+      pr_offset = (rounded_frame_size (d) + offset
+		   + SHMEDIA_REGS_STACK_ADJUST ());
+    }
+  else
+    pr_offset = rounded_frame_size (d);
+
+  emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
+
+  if (frame_pointer_needed)
+    emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
+  else
+    emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
+
+  tmp = gen_frame_mem (Pmode, tmp);
+  emit_insn (GEN_MOV (tmp, ra));
+  /* Tell this store isn't dead.  */
+  emit_use (tmp);
+}
+
+/* Clear variables at function end.  */
+static void
+sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
+			     HOST_WIDE_INT size ATTRIBUTE_UNUSED)
+{
+}
+
+static rtx
+sh_builtin_saveregs (void)
+{
+  /* First unnamed integer register.  */
+  int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
+  /* Number of integer registers we need to save.  */
+  int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
+  /* First unnamed SFmode float reg */
+  int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
+  /* Number of SFmode float regs to save.  */
+  int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
+  rtx regbuf, fpregs;
+  int bufsize, regno;
+  alias_set_type alias_set;
+
+  if (TARGET_SH5)
+    {
+      if (n_intregs)
+	{
+	  int pushregs = n_intregs;
+
+	  while (pushregs < NPARM_REGS (SImode) - 1
+		 && (CALL_COOKIE_INT_REG_GET
+			(crtl->args.info.call_cookie,
+			 NPARM_REGS (SImode) - pushregs)
+		     == 1))
+	    {
+	      crtl->args.info.call_cookie
+		&= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
+					  - pushregs, 1);
+	      pushregs++;
+	    }
+
+	  if (pushregs == NPARM_REGS (SImode))
+	    crtl->args.info.call_cookie
+	      |= (CALL_COOKIE_INT_REG (0, 1)
+		  | CALL_COOKIE_STACKSEQ (pushregs - 1));
+	  else
+	    crtl->args.info.call_cookie
+	      |= CALL_COOKIE_STACKSEQ (pushregs);
+
+	  crtl->args.pretend_args_size += 8 * n_intregs;
+	}
+      if (TARGET_SHCOMPACT)
+	return const0_rtx;
+    }
+
+  if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
+    {
+      error ("__builtin_saveregs not supported by this subtarget");
+      return const0_rtx;
+    }
+
+  if (TARGET_SHMEDIA)
+    n_floatregs = 0;
+
+  /* Allocate block of memory for the regs.  */
+  /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
+     Or can assign_stack_local accept a 0 SIZE argument?  */
+  bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
+
+  if (TARGET_SHMEDIA)
+    regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
+  else if (n_floatregs & 1)
+    {
+      rtx addr;
+
+      regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
+      addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
+      emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
+      regbuf = change_address (regbuf, BLKmode, addr);
+    }
+  else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
+    {
+      rtx addr, mask;
+
+      regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
+      addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
+						     XEXP (regbuf, 0), 4));
+      mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
+      emit_insn (gen_andsi3 (addr, addr, mask));
+      regbuf = change_address (regbuf, BLKmode, addr);
+    }
+  else
+    regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
+  alias_set = get_varargs_alias_set ();
+  set_mem_alias_set (regbuf, alias_set);
+
+  /* Save int args.
+     This is optimized to only save the regs that are necessary.  Explicitly
+     named args need not be saved.  */
+  if (n_intregs > 0)
+    move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
+			 adjust_address (regbuf, BLKmode,
+					 n_floatregs * UNITS_PER_WORD),
+			 n_intregs);
+
+  if (TARGET_SHMEDIA)
+    /* Return the address of the regbuf.  */
+    return XEXP (regbuf, 0);
+
+  /* Save float args.
+     This is optimized to only save the regs that are necessary.  Explicitly
+     named args need not be saved.
+     We explicitly build a pointer to the buffer because it halves the insn
+     count when not optimizing (otherwise the pointer is built for each reg
+     saved).
+     We emit the moves in reverse order so that we can use predecrement.  */
+
+  fpregs = copy_to_mode_reg (Pmode,
+			     plus_constant (Pmode, XEXP (regbuf, 0),
+					    n_floatregs * UNITS_PER_WORD));
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      rtx mem;
+      for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
+	{
+	  emit_insn (gen_addsi3 (fpregs, fpregs,
+				 GEN_INT (-2 * UNITS_PER_WORD)));
+	  mem = change_address (regbuf, DFmode, fpregs);
+	  emit_move_insn (mem,
+			  gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
+	}
+      regno = first_floatreg;
+      if (regno & 1)
+	{
+	  emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
+	  mem = change_address (regbuf, SFmode, fpregs);
+	  emit_move_insn (mem,
+			  gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
+					       + regno - SH_REG_MSW_OFFSET));
+	}
+    }
+  else
+    for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
+      {
+        rtx mem;
+
+	emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
+	mem = change_address (regbuf, SFmode, fpregs);
+	emit_move_insn (mem,
+			gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
+      }
+
+  /* Return the address of the regbuf.  */
+  return XEXP (regbuf, 0);
+}
+
+/* Define the `__builtin_va_list' type for the ABI.  */
+static tree
+sh_build_builtin_va_list (void)
+{
+  tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
+  tree record, type_decl;
+
+  if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
+      || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
+    return ptr_type_node;
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION,
+			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
+
+  f_next_o = build_decl (BUILTINS_LOCATION,
+			 FIELD_DECL, get_identifier ("__va_next_o"),
+			 ptr_type_node);
+  f_next_o_limit = build_decl (BUILTINS_LOCATION,
+			       FIELD_DECL,
+			       get_identifier ("__va_next_o_limit"),
+			       ptr_type_node);
+  f_next_fp = build_decl (BUILTINS_LOCATION,
+			  FIELD_DECL, get_identifier ("__va_next_fp"),
+			  ptr_type_node);
+  f_next_fp_limit = build_decl (BUILTINS_LOCATION,
+				FIELD_DECL,
+				get_identifier ("__va_next_fp_limit"),
+				ptr_type_node);
+  f_next_stack = build_decl (BUILTINS_LOCATION,
+			     FIELD_DECL, get_identifier ("__va_next_stack"),
+			     ptr_type_node);
+
+  DECL_FIELD_CONTEXT (f_next_o) = record;
+  DECL_FIELD_CONTEXT (f_next_o_limit) = record;
+  DECL_FIELD_CONTEXT (f_next_fp) = record;
+  DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
+  DECL_FIELD_CONTEXT (f_next_stack) = record;
+
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+  TYPE_FIELDS (record) = f_next_o;
+  DECL_CHAIN (f_next_o) = f_next_o_limit;
+  DECL_CHAIN (f_next_o_limit) = f_next_fp;
+  DECL_CHAIN (f_next_fp) = f_next_fp_limit;
+  DECL_CHAIN (f_next_fp_limit) = f_next_stack;
+
+  layout_type (record);
+
+  return record;
+}
+
+/* Implement `va_start' for varargs and stdarg.  */
+static void
+sh_va_start (tree valist, rtx nextarg)
+{
+  tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
+  tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
+  tree t, u;
+  int nfp, nint;
+
+  if (TARGET_SH5)
+    {
+      expand_builtin_saveregs ();
+      std_expand_builtin_va_start (valist, nextarg);
+      return;
+    }
+
+  if ((! TARGET_SH2E && ! TARGET_SH4)
+      || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
+    {
+      std_expand_builtin_va_start (valist, nextarg);
+      return;
+    }
+
+  f_next_o = TYPE_FIELDS (va_list_type_node);
+  f_next_o_limit = DECL_CHAIN (f_next_o);
+  f_next_fp = DECL_CHAIN (f_next_o_limit);
+  f_next_fp_limit = DECL_CHAIN (f_next_fp);
+  f_next_stack = DECL_CHAIN (f_next_fp_limit);
+
+  next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
+		   NULL_TREE);
+  next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
+			 valist, f_next_o_limit, NULL_TREE);
+  next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
+		    NULL_TREE);
+  next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
+			  valist, f_next_fp_limit, NULL_TREE);
+  next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
+		       valist, f_next_stack, NULL_TREE);
+
+  /* Call __builtin_saveregs.  */
+  u = make_tree (sizetype, expand_builtin_saveregs ());
+  u = fold_convert (ptr_type_node, u);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
+  if (nfp < 8)
+    nfp = 8 - nfp;
+  else
+    nfp = 0;
+  u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  nint = crtl->args.info.arg_count[SH_ARG_INT];
+  if (nint < 4)
+    nint = 4 - nint;
+  else
+    nint = 0;
+  u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  u = make_tree (ptr_type_node, nextarg);
+  t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
+  TREE_SIDE_EFFECTS (t) = 1;
+  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+/* TYPE is a RECORD_TYPE.  If there is only a single nonzero-sized
+   member, return it.  */
+static tree
+find_sole_member (tree type)
+{
+  tree field, member = NULL_TREE;
+
+  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+    {
+      if (TREE_CODE (field) != FIELD_DECL)
+	continue;
+      if (!DECL_SIZE (field))
+	return NULL_TREE;
+      if (integer_zerop (DECL_SIZE (field)))
+	continue;
+      if (member)
+	return NULL_TREE;
+      member = field;
+    }
+  return member;
+}
+
+/* Implement `va_arg'.  */
+static tree
+sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			 gimple_seq *post_p ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT size, rsize;
+  tree tmp, pptr_type_node;
+  tree addr, lab_over = NULL, result = NULL;
+  bool pass_by_ref;
+  tree eff_type;
+
+  if (!VOID_TYPE_P (type))
+    pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
+  else
+    pass_by_ref = false;
+
+  if (pass_by_ref)
+    type = build_pointer_type (type);
+
+  size = int_size_in_bytes (type);
+  rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
+  pptr_type_node = build_pointer_type (ptr_type_node);
+
+  if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
+      && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
+    {
+      tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
+      tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
+      int pass_as_float;
+      tree lab_false;
+      tree member;
+
+      f_next_o = TYPE_FIELDS (va_list_type_node);
+      f_next_o_limit = DECL_CHAIN (f_next_o);
+      f_next_fp = DECL_CHAIN (f_next_o_limit);
+      f_next_fp_limit = DECL_CHAIN (f_next_fp);
+      f_next_stack = DECL_CHAIN (f_next_fp_limit);
+
+      next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
+		       NULL_TREE);
+      next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
+			     valist, f_next_o_limit, NULL_TREE);
+      next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
+			valist, f_next_fp, NULL_TREE);
+      next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
+			      valist, f_next_fp_limit, NULL_TREE);
+      next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
+			   valist, f_next_stack, NULL_TREE);
+
+      /* Structures with a single member with a distinct mode are passed
+	 like their member.  This is relevant if the latter has a REAL_TYPE
+	 or COMPLEX_TYPE type.  */
+      eff_type = type;
+      while (TREE_CODE (eff_type) == RECORD_TYPE
+	     && (member = find_sole_member (eff_type))
+	     && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
+		 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
+		 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
+	{
+	  tree field_type = TREE_TYPE (member);
+
+	  if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
+	    eff_type = field_type;
+	  else
+	    {
+	      gcc_assert ((TYPE_ALIGN (eff_type)
+			   < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
+			  || (TYPE_ALIGN (eff_type)
+			      > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
+	      break;
+	    }
+	}
+
+      if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+	{
+	  pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
+			   || (TREE_CODE (eff_type) == COMPLEX_TYPE
+			       && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
+			       && size <= 16));
+	}
+      else
+	{
+	  pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
+	}
+
+      addr = create_tmp_var (pptr_type_node, NULL);
+      lab_false = create_artificial_label (UNKNOWN_LOCATION);
+      lab_over = create_artificial_label (UNKNOWN_LOCATION);
+
+      valist = build_simple_mem_ref (addr);
+
+      if (pass_as_float)
+	{
+	  tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
+	  tree cmp;
+	  bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
+
+	  tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
+	  gimplify_assign (unshare_expr (addr), tmp, pre_p);
+
+	  gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
+	  tmp = next_fp_limit;
+	  if (size > 4 && !is_double)
+	    tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
+	  tmp = build2 (GE_EXPR, boolean_type_node,
+			unshare_expr (next_fp_tmp), unshare_expr (tmp));
+	  cmp = build3 (COND_EXPR, void_type_node, tmp,
+		        build1 (GOTO_EXPR, void_type_node,
+				unshare_expr (lab_false)), NULL_TREE);
+	  if (!is_double)
+	    gimplify_and_add (cmp, pre_p);
+
+	  if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
+	      || (is_double || size == 16))
+	    {
+	      tmp = fold_convert (sizetype, next_fp_tmp);
+	      tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
+			    size_int (UNITS_PER_WORD));
+	      tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
+	      gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
+	    }
+	  if (is_double)
+	    gimplify_and_add (cmp, pre_p);
+
+#ifdef FUNCTION_ARG_SCmode_WART
+	  if (TYPE_MODE (eff_type) == SCmode
+	      && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
+	    {
+	      tree subtype = TREE_TYPE (eff_type);
+	      tree real, imag;
+
+	      imag
+		= std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
+	      imag = get_initialized_tmp_var (imag, pre_p, NULL);
+
+	      real
+		= std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
+	      real = get_initialized_tmp_var (real, pre_p, NULL);
+
+	      result = build2 (COMPLEX_EXPR, eff_type, real, imag);
+	      if (type != eff_type)
+		result = build1 (VIEW_CONVERT_EXPR, type, result);
+	      result = get_initialized_tmp_var (result, pre_p, NULL);
+	    }
+#endif /* FUNCTION_ARG_SCmode_WART */
+
+	  tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
+	  gimplify_and_add (tmp, pre_p);
+
+	  tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
+	  gimplify_and_add (tmp, pre_p);
+
+	  tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
+	  gimplify_assign (unshare_expr (addr), tmp, pre_p);
+	  gimplify_assign (unshare_expr (next_fp_tmp),
+			   unshare_expr (valist), pre_p);
+
+	  gimplify_assign (unshare_expr (valist),
+			   unshare_expr (next_fp_tmp), post_p);
+	  valist = next_fp_tmp;
+	}
+      else
+	{
+	  tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
+	  tmp = build2 (GT_EXPR, boolean_type_node, tmp,
+			unshare_expr (next_o_limit));
+	  tmp = build3 (COND_EXPR, void_type_node, tmp,
+		        build1 (GOTO_EXPR, void_type_node,
+				unshare_expr (lab_false)),
+			NULL_TREE);
+	  gimplify_and_add (tmp, pre_p);
+
+	  tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
+	  gimplify_assign (unshare_expr (addr), tmp, pre_p);
+
+	  tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
+	  gimplify_and_add (tmp, pre_p);
+
+	  tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
+	  gimplify_and_add (tmp, pre_p);
+
+	  if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
+	    gimplify_assign (unshare_expr (next_o),
+			     unshare_expr (next_o_limit), pre_p);
+
+	  tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
+	  gimplify_assign (unshare_expr (addr), tmp, pre_p);
+	}
+
+      if (!result)
+	{
+	  tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
+	  gimplify_and_add (tmp, pre_p);
+	}
+    }
+
+  /* ??? In va-sh.h, there had been code to make values larger than
+     size 8 indirect.  This does not match the FUNCTION_ARG macros.  */
+
+  tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
+  if (result)
+    {
+      gimplify_assign (result, tmp, pre_p);
+      result = build1 (NOP_EXPR, TREE_TYPE (result), result);
+      tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
+      gimplify_and_add (tmp, pre_p);
+    }
+  else
+    result = tmp;
+
+  if (pass_by_ref)
+    result = build_va_arg_indirect_ref (result);
+
+  return result;
+}
+
+/* 64 bit floating points memory transfers are paired single precision loads
+   or store.  So DWARF information needs fixing in little endian (unless
+   PR=SZ=1 in FPSCR).  */
+rtx
+sh_dwarf_register_span (rtx reg)
+{
+  unsigned regno = REGNO (reg);
+
+  if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
+    return NULL_RTX;
+
+  return
+    gen_rtx_PARALLEL (VOIDmode,
+		      gen_rtvec (2,
+				 gen_rtx_REG (SFmode, regno + 1),
+				 gen_rtx_REG (SFmode, regno)));
+}
+
+static enum machine_mode
+sh_promote_function_mode (const_tree type, enum machine_mode mode,
+			  int *punsignedp, const_tree funtype,
+			  int for_return)
+{
+  if (sh_promote_prototypes (funtype))
+    return promote_mode (type, mode, punsignedp);
+  else
+    return default_promote_function_mode (type, mode, punsignedp, funtype,
+					  for_return);
+}
+
+static bool
+sh_promote_prototypes (const_tree type)
+{
+  if (TARGET_HITACHI)
+    return false;
+  if (! type)
+    return true;
+  return ! sh_attr_renesas_p (type);
+}
+
+/* Whether an argument must be passed by reference.  On SHcompact, we
+   pretend arguments wider than 32-bits that would have been passed in
+   registers are passed by reference, so that an SHmedia trampoline
+   loads them into the full 64-bits registers.  */
+static int
+shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		 const_tree type, bool named)
+{
+  unsigned HOST_WIDE_INT size;
+
+  if (type)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
+      && (!named
+	  || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
+	  || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
+	      && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
+      && size > 4
+      && !SHCOMPACT_FORCE_ON_STACK (mode, type)
+      && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
+    return size;
+  else
+    return 0;
+}
+
+static bool
+sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
+		      const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
+  if (targetm.calls.must_pass_in_stack (mode, type))
+    return true;
+
+  /* ??? std_gimplify_va_arg_expr passes NULL for cum.  That function
+     wants to know about pass-by-reference semantics for incoming
+     arguments.  */
+  if (! cum)
+    return false;
+
+  if (TARGET_SHCOMPACT)
+    {
+      cum->byref = shcompact_byref (cum, mode, type, named);
+      return cum->byref != 0;
+    }
+
+  return false;
+}
+
+static bool
+sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
+		  const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  /* ??? How can it possibly be correct to return true only on the
+     caller side of the equation?  Is there someplace else in the
+     sh backend that's magically producing the copies?  */
+  return (get_cumulative_args (cum)->outgoing
+	  && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
+	      % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
+}
+
+static int
+sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
+		      tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int words = 0;
+
+  if (!TARGET_SH5
+      && PASS_IN_REG_P (*cum, mode, type)
+      && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
+      && (ROUND_REG (*cum, mode)
+	  + (mode != BLKmode
+	     ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
+	     : ROUND_ADVANCE (int_size_in_bytes (type)))
+	  > NPARM_REGS (mode)))
+    words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
+
+  else if (!TARGET_SHCOMPACT
+	   && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
+    words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
+
+  return words * UNITS_PER_WORD;
+}
+
+
+/* Define where to put the arguments to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+
+   On SH the first args are normally in registers
+   and the rest are pushed.  Any arg that starts within the first
+   NPARM_REGS words is at least partially passed in a register unless
+   its data type forbids.  */
+static rtx
+sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
+		 const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
+
+  if (! TARGET_SH5 && mode == VOIDmode)
+    return GEN_INT (ca->renesas_abi ? 1 : 0);
+
+  if (! TARGET_SH5
+      && PASS_IN_REG_P (*ca, mode, type)
+      && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
+    {
+      int regno;
+
+      if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
+	  && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
+	{
+	  rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SFmode,
+						   BASE_ARG_REG (mode)
+						   + (ROUND_REG (*ca, mode) ^ 1)),
+				      const0_rtx);
+	  rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
+				      gen_rtx_REG (SFmode,
+						   BASE_ARG_REG (mode)
+						   + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
+				      GEN_INT (4));
+	  return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
+	}
+
+     /* If the alignment of a DF value causes an SF register to be
+	skipped, we will use that skipped register for the next SF
+	value.  */
+      if ((TARGET_HITACHI || ca->renesas_abi)
+	  && ca->free_single_fp_reg
+	  && mode == SFmode)
+	return gen_rtx_REG (mode, ca->free_single_fp_reg);
+
+      regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
+	       ^ (mode == SFmode && TARGET_SH4
+		  && TARGET_LITTLE_ENDIAN
+		  && ! TARGET_HITACHI && ! ca->renesas_abi);
+      return gen_rtx_REG (mode, regno);
+
+    }
+
+  if (TARGET_SH5)
+    {
+      if (mode == VOIDmode && TARGET_SHCOMPACT)
+	return GEN_INT (ca->call_cookie);
+
+      /* The following test assumes unnamed arguments are promoted to
+	 DFmode.  */
+      if (mode == SFmode && ca->free_single_fp_reg)
+	return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
+
+      if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
+	  && (named || ! ca->prototype_p)
+	  && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
+	{
+	  if (! ca->prototype_p && TARGET_SHMEDIA)
+	    return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
+
+	  return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
+					   FIRST_FP_PARM_REG
+					   + ca->arg_count[(int) SH_ARG_FLOAT]);
+	}
+
+      if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
+	  && (! TARGET_SHCOMPACT
+	      || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
+		  && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
+						   type, named))))
+	{
+	  return gen_rtx_REG (mode, (FIRST_PARM_REG
+				       + ca->arg_count[(int) SH_ARG_INT]));
+	}
+
+      return NULL_RTX;
+    }
+
+  return NULL_RTX;
+}
+
+/* Update the data in CUM to advance over an argument
+   of mode MODE and data type TYPE.
+   (TYPE is null for libcalls where that information may not be
+   available.)  */
+static void
+sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
+			 const_tree type, bool named)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
+
+  if (ca->force_mem)
+    ca->force_mem = 0;
+  else if (TARGET_SH5)
+    {
+      const_tree type2 = (ca->byref && type
+			  ? TREE_TYPE (type)
+			  : type);
+      enum machine_mode mode2 = (ca->byref && type
+				 ? TYPE_MODE (type2)
+				 : mode);
+      int dwords = ((ca->byref
+		     ? ca->byref
+		     : mode2 == BLKmode
+		     ? int_size_in_bytes (type2)
+		     : GET_MODE_SIZE (mode2)) + 7) / 8;
+      int numregs = MIN (dwords, NPARM_REGS (SImode)
+			 - ca->arg_count[(int) SH_ARG_INT]);
+
+      if (numregs)
+	{
+	  ca->arg_count[(int) SH_ARG_INT] += numregs;
+	  if (TARGET_SHCOMPACT
+	      && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
+	    {
+	      ca->call_cookie
+		|= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
+					- numregs, 1);
+	      /* N.B. We want this also for outgoing.  */
+	      ca->stack_regs += numregs;
+	    }
+	  else if (ca->byref)
+	    {
+	      if (! ca->outgoing)
+		ca->stack_regs += numregs;
+	      ca->byref_regs += numregs;
+	      ca->byref = 0;
+	      do
+		ca->call_cookie
+		  |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
+					  - numregs, 2);
+	      while (--numregs);
+	      ca->call_cookie
+		|= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
+					- 1, 1);
+	    }
+	  else if (dwords > numregs)
+	    {
+	      int pushregs = numregs;
+
+	      if (TARGET_SHCOMPACT)
+		ca->stack_regs += numregs;
+	      while (pushregs < NPARM_REGS (SImode) - 1
+		     && (CALL_COOKIE_INT_REG_GET
+			 (ca->call_cookie,
+			  NPARM_REGS (SImode) - pushregs)
+			 == 1))
+		{
+		  ca->call_cookie
+		    &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
+					      - pushregs, 1);
+		  pushregs++;
+		}
+	      if (numregs == NPARM_REGS (SImode))
+		ca->call_cookie
+		  |= CALL_COOKIE_INT_REG (0, 1)
+		  | CALL_COOKIE_STACKSEQ (numregs - 1);
+	      else
+		ca->call_cookie
+		  |= CALL_COOKIE_STACKSEQ (numregs);
+	    }
+	}
+      if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
+	  && (named || ! ca->prototype_p))
+	{
+	  if (mode2 == SFmode && ca->free_single_fp_reg)
+	    ca->free_single_fp_reg = 0;
+	  else if (ca->arg_count[(int) SH_ARG_FLOAT]
+		   < NPARM_REGS (SFmode))
+	    {
+	      int numfpregs
+		= MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
+		       NPARM_REGS (SFmode)
+		       - ca->arg_count[(int) SH_ARG_FLOAT]);
+
+	      ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
+
+	      if (TARGET_SHCOMPACT && ! ca->prototype_p)
+		{
+		  if (ca->outgoing && numregs > 0)
+		    do
+		      {
+			ca->call_cookie
+			  |= (CALL_COOKIE_INT_REG
+			      (ca->arg_count[(int) SH_ARG_INT]
+			       - numregs + ((numfpregs - 2) / 2),
+			       4 + (ca->arg_count[(int) SH_ARG_FLOAT]
+				    - numfpregs) / 2));
+		      }
+		    while (numfpregs -= 2);
+		}
+	      else if (mode2 == SFmode && (named)
+		       && (ca->arg_count[(int) SH_ARG_FLOAT]
+			   < NPARM_REGS (SFmode)))
+		ca->free_single_fp_reg
+		  = FIRST_FP_PARM_REG - numfpregs
+		  + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
+	    }
+	}
+      return;
+    }
+
+  if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
+    {
+      /* Note that we've used the skipped register.  */
+      if (mode == SFmode && ca->free_single_fp_reg)
+	{
+	  ca->free_single_fp_reg = 0;
+	  return;
+	}
+      /* When we have a DF after an SF, there's an SF register that get
+	 skipped in order to align the DF value.  We note this skipped
+	 register, because the next SF value will use it, and not the
+	 SF that follows the DF.  */
+      if (mode == DFmode
+	  && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
+	{
+	  ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
+				    + BASE_ARG_REG (mode));
+	}
+    }
+
+  if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
+      || PASS_IN_REG_P (*ca, mode, type))
+    (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
+     = (ROUND_REG (*ca, mode)
+	+ (mode == BLKmode
+	   ? ROUND_ADVANCE (int_size_in_bytes (type))
+	   : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
+}
+
+/* The Renesas calling convention doesn't quite fit into this scheme since
+   the address is passed like an invisible argument, but one that is always
+   passed in memory.  */
+static rtx
+sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
+{
+  if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
+    return NULL_RTX;
+  return gen_rtx_REG (Pmode, 2);
+}
+
+/* Worker function for TARGET_FUNCTION_VALUE.
+
+   For the SH, this is like LIBCALL_VALUE, except that we must change the
+   mode like PROMOTE_MODE does.
+   ??? PROMOTE_MODE is ignored for non-scalar types.  The set of types
+   tested here has to be kept in sync with the one in
+   explow.c:promote_mode.  */
+static rtx
+sh_function_value (const_tree valtype,
+		   const_tree fn_decl_or_type,
+		   bool outgoing ATTRIBUTE_UNUSED)
+{
+  if (fn_decl_or_type
+      && !DECL_P (fn_decl_or_type))
+    fn_decl_or_type = NULL;
+
+  return gen_rtx_REG (
+	   ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
+	     && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
+	     && (TREE_CODE (valtype) == INTEGER_TYPE
+		 || TREE_CODE (valtype) == ENUMERAL_TYPE
+		 || TREE_CODE (valtype) == BOOLEAN_TYPE
+		 || TREE_CODE (valtype) == REAL_TYPE
+		 || TREE_CODE (valtype) == OFFSET_TYPE))
+	    && sh_promote_prototypes (fn_decl_or_type)
+	    ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
+	   BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
+}
+
+/* Worker function for TARGET_LIBCALL_VALUE.  */
+static rtx
+sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
+}
+
+/* Return true if N is a possible register number of function value.  */
+static bool
+sh_function_value_regno_p (const unsigned int regno)
+{
+  return ((regno) == FIRST_RET_REG 
+	  || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
+	  || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
+}
+
+/* Worker function for TARGET_RETURN_IN_MEMORY.  */
+static bool
+sh_return_in_memory (const_tree type, const_tree fndecl)
+{
+  if (TARGET_SH5)
+    {
+      if (TYPE_MODE (type) == BLKmode)
+	return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
+      else
+	return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
+    }
+  else
+    {
+      return (TYPE_MODE (type) == BLKmode
+	      || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
+		  && TREE_CODE (type) == RECORD_TYPE));
+    }
+}
+
+/* We actually emit the code in sh_expand_prologue.  We used to use
+   a static variable to flag that we need to emit this code, but that
+   doesn't when inlining, when functions are deferred and then emitted
+   later.  Fortunately, we already have two flags that are part of struct
+   function that tell if a function uses varargs or stdarg.  */
+static void
+sh_setup_incoming_varargs (cumulative_args_t ca,
+			   enum machine_mode mode,
+			   tree type,
+			   int *pretend_arg_size,
+			   int second_time ATTRIBUTE_UNUSED)
+{
+  gcc_assert (cfun->stdarg);
+  if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
+    {
+      int named_parm_regs, anon_parm_regs;
+
+      named_parm_regs = (ROUND_REG (*get_cumulative_args (ca), mode)
+			 + (mode == BLKmode
+			    ? ROUND_ADVANCE (int_size_in_bytes (type))
+			    : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
+      anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
+      if (anon_parm_regs > 0)
+	*pretend_arg_size = anon_parm_regs * 4;
+    }
+}
+
+static bool
+sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
+{
+  return TARGET_SH5;
+}
+
+static bool
+sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
+{
+  CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
+
+  return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
+}
+
+
+/* Define the offset between two registers, one to be eliminated, and
+   the other its replacement, at the start of a routine.  */
+int
+initial_elimination_offset (int from, int to)
+{
+  int regs_saved;
+  int regs_saved_rounding = 0;
+  int total_saved_regs_space;
+  int total_auto_space;
+  int save_flags = target_flags;
+  int copy_flags;
+  HARD_REG_SET live_regs_mask;
+
+  shmedia_space_reserved_for_target_registers = false;
+  regs_saved = calc_live_regs (&live_regs_mask);
+  regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
+
+  if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
+    {
+      shmedia_space_reserved_for_target_registers = true;
+      regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
+    }
+
+  if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
+    regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
+			   - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
+
+  total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
+  copy_flags = target_flags;
+  target_flags = save_flags;
+
+  total_saved_regs_space = regs_saved + regs_saved_rounding;
+
+  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return total_saved_regs_space + total_auto_space
+	   + crtl->args.info.byref_regs * 8;
+
+  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return total_saved_regs_space + total_auto_space
+	   + crtl->args.info.byref_regs * 8;
+
+  /* Initial gap between fp and sp is 0.  */
+  if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return 0;
+
+  if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    return rounded_frame_size (0);
+
+  if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
+    return rounded_frame_size (0);
+
+  gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
+	      && (to == HARD_FRAME_POINTER_REGNUM
+		  || to == STACK_POINTER_REGNUM));
+  if (TARGET_SH5)
+    {
+      int n = total_saved_regs_space;
+      int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
+      save_schedule schedule;
+      save_entry *entry;
+
+      n += total_auto_space;
+
+      /* If it wasn't saved, there's not much we can do.  */
+      if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
+	return n;
+
+      target_flags = copy_flags;
+
+      sh5_schedule_saves (&live_regs_mask, &schedule, n);
+      for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
+	if (entry->reg == pr_reg)
+	  {
+	    target_flags = save_flags;
+	    return entry->offset;
+	  }
+      gcc_unreachable ();
+    }
+  else
+    return total_auto_space;
+}
+
+/* Parse the -mfixed-range= option string.  */
+void
+sh_fix_range (const char *const_str)
+{
+  int i, first, last;
+  char *str, *dash, *comma;
+
+  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
+     REG2 are either register names or register numbers.  The effect
+     of this option is to mark the registers in the range from REG1 to
+     REG2 as ``fixed'' so they won't be used by the compiler.  */
+
+  i = strlen (const_str);
+  str = (char *) alloca (i + 1);
+  memcpy (str, const_str, i + 1);
+
+  while (1)
+    {
+      dash = strchr (str, '-');
+      if (!dash)
+	{
+	  warning (0, "value of -mfixed-range must have form REG1-REG2");
+	  return;
+	}
+      *dash = '\0';
+      comma = strchr (dash + 1, ',');
+      if (comma)
+	*comma = '\0';
+
+      first = decode_reg_name (str);
+      if (first < 0)
+	{
+	  warning (0, "unknown register name: %s", str);
+	  return;
+	}
+
+      last = decode_reg_name (dash + 1);
+      if (last < 0)
+	{
+	  warning (0, "unknown register name: %s", dash + 1);
+	  return;
+	}
+
+      *dash = '-';
+
+      if (first > last)
+	{
+	  warning (0, "%s-%s is an empty range", str, dash + 1);
+	  return;
+	}
+
+      for (i = first; i <= last; ++i)
+	fixed_regs[i] = call_used_regs[i] = 1;
+
+      if (!comma)
+	break;
+
+      *comma = ',';
+      str = comma + 1;
+    }
+}
+
+/* Insert any deferred function attributes from earlier pragmas.  */
+static void
+sh_insert_attributes (tree node, tree *attributes)
+{
+  tree attrs;
+
+  if (TREE_CODE (node) != FUNCTION_DECL)
+    return;
+
+  /* We are only interested in fields.  */
+  if (!DECL_P (node))
+    return;
+
+  /* Append the attributes to the deferred attributes.  */
+  *sh_deferred_function_attributes_tail = *attributes;
+  attrs = sh_deferred_function_attributes;
+  if (!attrs)
+    return;
+
+  /* Some attributes imply or require the interrupt attribute.  */
+  if (!lookup_attribute ("interrupt_handler", attrs)
+      && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
+    {
+      /* If we have a trapa_handler, but no interrupt_handler attribute,
+	 insert an interrupt_handler attribute.  */
+      if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
+	/* We can't use sh_pr_interrupt here because that's not in the
+	   java frontend.  */
+	attrs
+	  = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
+      /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
+	 if the interrupt attribute is missing, we ignore the attribute
+	 and warn.  */
+      else if (lookup_attribute ("sp_switch", attrs)
+	       || lookup_attribute ("trap_exit", attrs)
+	       || lookup_attribute ("nosave_low_regs", attrs)
+	       || lookup_attribute ("resbank", attrs))
+	{
+	  tree *tail;
+
+	  for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
+	    {
+	      if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
+		  || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
+		  || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
+		  || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
+		warning (OPT_Wattributes,
+			 "%qE attribute only applies to interrupt functions",
+			 TREE_PURPOSE (attrs));
+	      else
+		{
+		  *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
+				     NULL_TREE);
+		  tail = &TREE_CHAIN (*tail);
+		}
+	    }
+	  attrs = *attributes;
+	}
+    }
+
+  /* Install the processed list.  */
+  *attributes = attrs;
+
+  /* Clear deferred attributes.  */
+  sh_deferred_function_attributes = NULL_TREE;
+  sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
+
+  return;
+}
+
+/*------------------------------------------------------------------------------
+  Target specific attributes
+  Supported attributes are:
+
+   * interrupt_handler
+	Specifies this function is an interrupt handler.
+
+   * trapa_handler
+	Like interrupt_handler, but don't save all registers.
+
+   * sp_switch
+	Specifies an alternate stack for an interrupt handler to run on.
+
+   * trap_exit
+	Use a trapa to exit an interrupt function instead of rte.
+
+   * nosave_low_regs
+	Don't save r0..r7 in an interrupt handler function.
+	This is useful on SH3* and SH4*, which have a separate set of low
+	regs for user and privileged modes.
+	This is mainly to be used for non-reentrant interrupt handlers (i.e.
+	those that run with interrupts disabled and thus can't be
+	interrupted thenselves).
+
+   * renesas
+	Use Renesas calling/layout conventions (functions and structures).
+
+   * resbank
+	In case of an interrupt handler function, use a register bank to
+	save registers R0-R14, MACH, MACL, GBR and PR.
+	This is available only on SH2A targets.
+
+   * function_vector
+	Declares a function to be called using the TBR relative addressing
+	mode.  Takes an argument that specifies the slot number in the table
+	where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
+*/
+
+/* Handle a 'resbank' attribute.  */
+static tree
+sh_handle_resbank_handler_attribute (tree * node, tree name,
+				     tree args ATTRIBUTE_UNUSED,
+				     int flags ATTRIBUTE_UNUSED,
+				     bool * no_add_attrs)
+{
+  if (!TARGET_SH2A)
+    {
+      warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
+	       name);
+      *no_add_attrs = true;
+    }
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "interrupt_handler" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+sh_handle_interrupt_handler_attribute (tree *node, tree name,
+				       tree args ATTRIBUTE_UNUSED,
+				       int flags ATTRIBUTE_UNUSED,
+				       bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (TARGET_SHCOMPACT)
+    {
+      error ("attribute interrupt_handler is not compatible with -m5-compact");
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an 'function_vector' attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
+					       tree args ATTRIBUTE_UNUSED,
+					       int flags ATTRIBUTE_UNUSED,
+					       bool * no_add_attrs)
+{
+  if (!TARGET_SH2A)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
+    {
+      /* The argument must be a constant integer.  */
+      warning (OPT_Wattributes,
+	       "%qE attribute argument not an integer constant",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
+    {
+      /* The argument value must be between 0 to 255.  */
+      warning (OPT_Wattributes,
+	       "%qE attribute argument should be between 0 to 255",
+	       name);
+      *no_add_attrs = true;
+    }
+  return NULL_TREE;
+}
+
+/* Returns true if current function has been assigned the attribute
+   'function_vector'.  */
+bool
+sh2a_is_function_vector_call (rtx x)
+{
+  if (GET_CODE (x) == SYMBOL_REF
+      && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
+    {
+      tree tr = SYMBOL_REF_DECL (x);
+
+      if (sh2a_function_vector_p (tr))
+        return true;
+    }
+
+  return false;
+}
+
+/* Returns the function vector number, if the attribute
+   'function_vector' is assigned, otherwise returns zero.  */
+int
+sh2a_get_function_vector_number (rtx x)
+{
+  int num;
+  tree list, t;
+
+  if ((GET_CODE (x) == SYMBOL_REF)
+      && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
+    {
+      t = SYMBOL_REF_DECL (x);
+
+      if (TREE_CODE (t) != FUNCTION_DECL)
+	return 0;
+
+      list = SH_ATTRIBUTES (t);
+      while (list)
+	{
+	  if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+	    {
+	      num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
+	      return num;
+	    }
+
+	  list = TREE_CHAIN (list);
+	}
+
+      return 0;
+    }
+  else
+    return 0;
+}
+
+/* Handle an "sp_switch" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
+			       int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
+    {
+      /* The argument must be a constant string.  */
+      warning (OPT_Wattributes, "%qE attribute argument not a string constant",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "trap_exit" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
+			       int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+  /* The argument specifies a trap number to be used in a trapa instruction
+     at function exit (instead of an rte instruction).  */
+  else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
+    {
+      /* The argument must be a constant integer.  */
+      warning (OPT_Wattributes, "%qE attribute argument not an "
+	       "integer constant", name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+static tree
+sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
+			     tree name ATTRIBUTE_UNUSED,
+			     tree args ATTRIBUTE_UNUSED,
+			     int flags ATTRIBUTE_UNUSED,
+			     bool *no_add_attrs ATTRIBUTE_UNUSED)
+{
+  return NULL_TREE;
+}
+
+/* True if __attribute__((renesas)) or -mrenesas.  */
+bool
+sh_attr_renesas_p (const_tree td)
+{
+  if (TARGET_HITACHI)
+    return true;
+  if (td == NULL_TREE)
+    return false;
+  if (DECL_P (td))
+    td = TREE_TYPE (td);
+  if (td == error_mark_node)
+    return false;
+  return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
+	  != NULL_TREE);
+}
+
+/* True if __attribute__((renesas)) or -mrenesas, for the current
+   function.  */
+bool
+sh_cfun_attr_renesas_p (void)
+{
+  return sh_attr_renesas_p (current_function_decl);
+}
+
+/* Returns true if the current function has the "interrupt_handler"
+   attribute set.  */
+bool
+sh_cfun_interrupt_handler_p (void)
+{
+  return (lookup_attribute ("interrupt_handler",
+			    DECL_ATTRIBUTES (current_function_decl))
+	  != NULL_TREE);
+}
+
+/* Returns true if FUNC has been assigned the attribute
+   "function_vector".  */
+bool
+sh2a_function_vector_p (tree func)
+{
+  tree list;
+  if (TREE_CODE (func) != FUNCTION_DECL)
+    return false;
+
+  list = SH_ATTRIBUTES (func);
+  while (list)
+    {
+      if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+	return true;
+
+      list = TREE_CHAIN (list);
+    }
+  return false;
+}
+
+/* Returns true if given tree has the "resbank" attribute set.  */
+bool
+sh_cfun_resbank_handler_p (void)
+{
+  return ((lookup_attribute ("resbank",
+			     DECL_ATTRIBUTES (current_function_decl))
+	  != NULL_TREE)
+	  && (lookup_attribute ("interrupt_handler",
+				DECL_ATTRIBUTES (current_function_decl))
+	      != NULL_TREE) && TARGET_SH2A);
+}
+
+/* Returns true if the current function has a "trap_exit" attribute set.  */
+bool
+sh_cfun_trap_exit_p (void)
+{
+  return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
+	 != NULL_TREE;
+}
+
+/* Implement TARGET_CHECK_PCH_TARGET_FLAGS.  */
+static const char *
+sh_check_pch_target_flags (int old_flags)
+{
+  if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
+				    | MASK_SH_E | MASK_HARD_SH4
+				    | MASK_FPU_SINGLE | MASK_SH4))
+    return _("created and used with different architectures / ABIs");
+  if ((old_flags ^ target_flags) & MASK_HITACHI)
+    return _("created and used with different ABIs");
+  if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
+    return _("created and used with different endianness");
+  return NULL;
+}
+
+/* Predicates used by the templates.  */
+
+/* Returns true if OP is MACL, MACH or PR.  The input must be a REG rtx.
+   Used only in general_movsrc_operand.  */
+bool
+system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  switch (REGNO (op))
+    {
+    case PR_REG:
+    case MACL_REG:
+    case MACH_REG:
+      return true;
+    }
+  return false;
+}
+
+/* Returns true if OP is a floating point value with value 0.0.  */
+bool
+fp_zero_operand (rtx op)
+{
+  REAL_VALUE_TYPE r;
+
+  if (GET_MODE (op) != SFmode)
+    return false;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+  return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
+}
+
+/* Returns true if OP is a floating point value with value 1.0.  */
+bool
+fp_one_operand (rtx op)
+{
+  REAL_VALUE_TYPE r;
+
+  if (GET_MODE (op) != SFmode)
+    return false;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+  return REAL_VALUES_EQUAL (r, dconst1);
+}
+
+/* In general mode switching is used.  If we are
+   compiling without -mfmovd, movsf_ie isn't taken into account for
+   mode switching.  We could check in machine_dependent_reorg for
+   cases where we know we are in single precision mode, but there is
+   interface to find that out during reload, so we must avoid
+   choosing an fldi alternative during reload and thus failing to
+   allocate a scratch register for the constant loading.  */
+bool
+fldi_ok (void)
+{
+  return true;
+}
+
+/* Return the TLS type for TLS symbols.  */
+enum tls_model
+tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (op) != SYMBOL_REF)
+    return TLS_MODEL_NONE;
+  return SYMBOL_REF_TLS_MODEL (op);
+}
+
+/* Return the destination address of a branch.  */
+static int
+branch_dest (rtx branch)
+{
+  rtx dest = SET_SRC (PATTERN (branch));
+  int dest_uid;
+
+  if (GET_CODE (dest) == IF_THEN_ELSE)
+    dest = XEXP (dest, 1);
+  dest = XEXP (dest, 0);
+  dest_uid = INSN_UID (dest);
+  return INSN_ADDRESSES (dest_uid);
+}
+
+/* Return nonzero if REG is not used after INSN.
+   We assume REG is a reload reg, and therefore does
+   not live past labels.  It may live past calls or jumps though.  */
+bool
+reg_unused_after (rtx reg, rtx insn)
+{
+  enum rtx_code code;
+  rtx set;
+
+  /* If the reg is set by this instruction, then it is safe for our
+     case.  Disregard the case where this is a store to memory, since
+     we are checking a register used in the store address.  */
+  set = single_set (insn);
+  if (set && !MEM_P (SET_DEST (set))
+      && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+    return true;
+
+  while ((insn = NEXT_INSN (insn)))
+    {
+      rtx set;
+      if (!INSN_P (insn))
+	continue;
+
+      code = GET_CODE (insn);
+
+#if 0
+      /* If this is a label that existed before reload, then the register
+	 is dead here.  However, if this is a label added by reorg, then
+	 the register may still be live here.  We can't tell the difference,
+	 so we just ignore labels completely.  */
+      if (code == CODE_LABEL)
+	return 1;
+      /* else */
+#endif
+
+      if (code == JUMP_INSN)
+	return false;
+
+      /* If this is a sequence, we must handle them all at once.
+	 We could have for instance a call that sets the target register,
+	 and an insn in a delay slot that uses the register.  In this case,
+	 we must return 0.  */
+      else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
+	{
+	  int i;
+	  int retval = 0;
+
+	  for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+	    {
+	      rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
+	      rtx set = single_set (this_insn);
+
+	      if (CALL_P (this_insn))
+		code = CALL_INSN;
+	      else if (JUMP_P (this_insn))
+		{
+		  if (INSN_ANNULLED_BRANCH_P (this_insn))
+		    return false;
+		  code = JUMP_INSN;
+		}
+
+	      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+		return false;
+	      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+		{
+		  if (!MEM_P (SET_DEST (set)))
+		    retval = true;
+		  else
+		    return false;
+		}
+	      if (set == NULL_RTX
+		  && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
+		return false;
+	    }
+	  if (retval == 1)
+	    return true;
+	  else if (code == JUMP_INSN)
+	    return false;
+	}
+
+      set = single_set (insn);
+      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+	return false;
+      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+	return !MEM_P (SET_DEST (set));
+      if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
+	return false;
+
+      if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
+	return true;
+    }
+  return true;
+}
+
+#include "ggc.h"
+
+static GTY(()) rtx t_reg_rtx;
+rtx
+get_t_reg_rtx (void)
+{
+  if (! t_reg_rtx)
+    t_reg_rtx = gen_rtx_REG (SImode, T_REG);
+  return t_reg_rtx;
+}
+
+static GTY(()) rtx fpscr_rtx;
+rtx
+get_fpscr_rtx (void)
+{
+  if (! fpscr_rtx)
+    {
+      fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
+      REG_USERVAR_P (fpscr_rtx) = 1;
+      mark_user_reg (fpscr_rtx);
+    }
+  if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
+    mark_user_reg (fpscr_rtx);
+  return fpscr_rtx;
+}
+
+static GTY(()) tree fpscr_values;
+
+static void
+emit_fpu_switch (rtx scratch, int index)
+{
+  rtx dst, src;
+
+  if (fpscr_values == NULL)
+    {
+      tree t;
+
+      t = build_index_type (integer_one_node);
+      t = build_array_type (integer_type_node, t);
+      t = build_decl (BUILTINS_LOCATION,
+		      VAR_DECL, get_identifier ("__fpscr_values"), t);
+      DECL_ARTIFICIAL (t) = 1;
+      DECL_IGNORED_P (t) = 1;
+      DECL_EXTERNAL (t) = 1;
+      TREE_STATIC (t) = 1;
+      TREE_PUBLIC (t) = 1;
+      TREE_USED (t) = 1;
+
+      fpscr_values = t;
+    }
+
+  src = DECL_RTL (fpscr_values);
+  if (!can_create_pseudo_p ())
+    {
+      emit_move_insn (scratch, XEXP (src, 0));
+      if (index != 0)
+	emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
+      src = adjust_automodify_address (src, PSImode, scratch, index * 4);
+    }
+  else
+    src = adjust_address (src, PSImode, index * 4);
+
+  dst = get_fpscr_rtx ();
+  emit_move_insn (dst, src);
+}
+
+void
+emit_sf_insn (rtx pat)
+{
+  emit_insn (pat);
+}
+
+void
+emit_df_insn (rtx pat)
+{
+  emit_insn (pat);
+}
+
+void
+expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
+{
+  emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
+}
+
+void
+expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
+{
+  emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
+			 get_fpscr_rtx ()));
+}
+
+void
+expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
+{
+  emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
+}
+
+void
+expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
+{
+  emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
+			get_fpscr_rtx ()));
+}
+
+static rtx get_free_reg (HARD_REG_SET);
+
+/* This function returns a register to use to load the address to load
+   the fpscr from.  Currently it always returns r1 or r7, but when we are
+   able to use pseudo registers after combine, or have a better mechanism
+   for choosing a register, it should be done here.  */
+/* REGS_LIVE is the liveness information for the point for which we
+   need this allocation.  In some bare-bones exit blocks, r1 is live at the
+   start.  We can even have all of r0..r3 being live:
+__complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
+   INSN before which new insns are placed with will clobber the register
+   we return.  If a basic block consists only of setting the return value
+   register to a pseudo and using that register, the return value is not
+   live before or after this block, yet we we'll insert our insns right in
+   the middle.  */
+static rtx
+get_free_reg (HARD_REG_SET regs_live)
+{
+  if (! TEST_HARD_REG_BIT (regs_live, 1))
+    return gen_rtx_REG (Pmode, 1);
+
+  /* Hard reg 1 is live; since this is a small register classes target,
+     there shouldn't be anything but a jump before the function end.  */
+  gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
+  return gen_rtx_REG (Pmode, 7);
+}
+
+/* This function will set the fpscr from memory.
+   MODE is the mode we are setting it to.  */
+void
+fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
+{
+  enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
+  enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
+  rtx addr_reg;
+
+  addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
+  emit_fpu_switch (addr_reg, fp_mode == norm_mode);
+}
+
+/* Is the given character a logical line separator for the assembler?  */
+#ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
+#endif
+
+static bool
+sequence_insn_p (rtx insn)
+{
+  rtx prev, next;
+
+  prev = PREV_INSN (insn);
+  if (prev == NULL)
+    return false;
+
+  next = NEXT_INSN (prev);
+  if (next == NULL)
+    return false;
+
+  return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
+}
+
+int
+sh_insn_length_adjustment (rtx insn)
+{
+  /* Instructions with unfilled delay slots take up an extra two bytes for
+     the nop in the delay slot.  */
+  if (((NONJUMP_INSN_P (insn)
+	&& GET_CODE (PATTERN (insn)) != USE
+	&& GET_CODE (PATTERN (insn)) != CLOBBER)
+       || CALL_P (insn) || JUMP_P (insn))
+      && ! sequence_insn_p (insn)
+      && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
+    return 2;
+
+  /* SH2e has a bug that prevents the use of annulled branches, so if
+     the delay slot is not filled, we'll have to put a NOP in it.  */
+  if (sh_cpu_attr == CPU_SH2E
+      && JUMP_P (insn)
+      && get_attr_type (insn) == TYPE_CBRANCH
+      && ! sequence_insn_p (insn))
+    return 2;
+
+  /* sh-dsp parallel processing insn take four bytes instead of two.  */
+
+  if (NONJUMP_INSN_P (insn))
+    {
+      int sum = 0;
+      rtx body = PATTERN (insn);
+      const char *templ;
+      char c;
+      bool maybe_label = true;
+
+      if (GET_CODE (body) == ASM_INPUT)
+	templ = XSTR (body, 0);
+      else if (asm_noperands (body) >= 0)
+	templ
+	  = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
+      else
+	return 0;
+      do
+	{
+	  int ppi_adjust = 0;
+
+	  do
+	    c = *templ++;
+	  while (c == ' ' || c == '\t');
+	  /* all sh-dsp parallel-processing insns start with p.
+	     The only non-ppi sh insn starting with p is pref.
+	     The only ppi starting with pr is prnd.  */
+	  if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
+	    ppi_adjust = 2;
+	  /* The repeat pseudo-insn expands two three insns, a total of
+	     six bytes in size.  */
+	  else if ((c == 'r' || c == 'R')
+		   && ! strncasecmp ("epeat", templ, 5))
+	    ppi_adjust = 4;
+	  while (c && c != '\n'
+		 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
+	    {
+	      /* If this is a label, it is obviously not a ppi insn.  */
+	      if (c == ':' && maybe_label)
+		{
+		  ppi_adjust = 0;
+		  break;
+		}
+	      else if (c == '\'' || c == '"')
+		maybe_label = false;
+	      c = *templ++;
+	    }
+	  sum += ppi_adjust;
+	  maybe_label = c != ':';
+	}
+      while (c);
+      return sum;
+    }
+  return 0;
+}
+
+/* Return TRUE for a valid displacement for the REG+disp addressing
+   with MODE.  */
+bool
+sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
+		       bool allow_zero)
+{
+  if (! CONST_INT_P (op))
+    return false;
+
+  if (TARGET_SHMEDIA)
+    {
+      int size;
+
+      /* Check if this is the address of an unaligned load / store.  */
+      if (mode == VOIDmode)
+	return satisfies_constraint_I06 (op);
+
+      size = GET_MODE_SIZE (mode);
+      return (!(INTVAL (op) & (size - 1))
+	      && INTVAL (op) >= -512 * size
+	      && INTVAL (op) < 512 * size);
+    }
+  else
+    {
+      const HOST_WIDE_INT offset = INTVAL (op);
+      const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
+      const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
+
+      /* If the mode does not support any displacement always return false.
+	 Even though an index of '0' is actually always valid, it will cause
+	 troubles when e.g. a DFmode move is split into two SFmode moves,
+	 where one SFmode move will have index '0' and the other move will
+	 have index '4'.  */
+       if (!allow_zero && max_disp < 1)
+	return false;
+
+      return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
+    }
+}
+
+/* Recognize an RTL expression that is a valid memory address for
+   an instruction.
+   The MODE argument is the machine mode for the MEM expression
+   that wants to use this address.
+   Allow  REG
+	  REG+disp
+	  REG+r0
+	  REG++
+	  --REG
+	  GBR
+	  GBR+disp  */
+static bool
+sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
+{
+  if (REG_P (x) && REGNO (x) == GBR_REG)
+    return true;
+
+  if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
+    return true;
+  else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
+	   && ! TARGET_SHMEDIA
+	   && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
+    return true;
+  else if (GET_CODE (x) == PLUS
+	   && (mode != PSImode || reload_completed))
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
+	return gbr_displacement (xop1, mode);
+
+      if (GET_MODE_SIZE (mode) <= 8
+	  && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
+	  && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
+	return true;
+
+      if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
+	   || ((xop0 == stack_pointer_rtx
+		|| xop0 == hard_frame_pointer_rtx)
+	       && REG_P (xop1) && REGNO (xop1) == R0_REG)
+	   || ((xop1 == stack_pointer_rtx
+		|| xop1 == hard_frame_pointer_rtx)
+	       && REG_P (xop0) && REGNO (xop0) == R0_REG))
+	  && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
+	      || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
+	      || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
+		  && TARGET_FMOVD && mode == DFmode)))
+	{
+	  if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
+	      && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
+	    return true;
+	  if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
+	      && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
+   isn't protected by a PIC unspec.  */
+bool
+nonpic_symbol_mentioned_p (rtx x)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
+      || GET_CODE (x) == PC)
+    return true;
+
+  /* We don't want to look into the possible MEM location of a
+     CONST_DOUBLE, since we're not going to use it, in general.  */
+  if (GET_CODE (x) == CONST_DOUBLE)
+    return false;
+
+  if (GET_CODE (x) == UNSPEC
+      && (XINT (x, 1) == UNSPEC_PIC
+	  || XINT (x, 1) == UNSPEC_GOT
+	  || XINT (x, 1) == UNSPEC_GOTOFF
+	  || XINT (x, 1) == UNSPEC_GOTPLT
+	  || XINT (x, 1) == UNSPEC_GOTTPOFF
+	  || XINT (x, 1) == UNSPEC_DTPOFF
+	  || XINT (x, 1) == UNSPEC_TPOFF
+	  || XINT (x, 1) == UNSPEC_PLT
+	  || XINT (x, 1) == UNSPEC_SYMOFF
+	  || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
+    return false;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
+	      return true;
+	}
+      else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
+	return true;
+    }
+
+  return false;
+}
+
+/* Convert a non-PIC address in `orig' to a PIC address using @GOT or
+   @GOTOFF in `reg'.  */
+rtx
+legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
+			rtx reg)
+{
+  if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
+    return orig;
+
+  if (GET_CODE (orig) == LABEL_REF
+      || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
+    {
+      if (reg == NULL_RTX)
+	reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOTOFF2reg (reg, orig));
+      return reg;
+    }
+  else if (GET_CODE (orig) == SYMBOL_REF)
+    {
+      if (reg == NULL_RTX)
+	reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOT2reg (reg, orig));
+      return reg;
+    }
+  return orig;
+}
+
+/* Given a (logical) mode size and an offset in bytes, try to find a the
+   appropriate displacement value for a mov insn.  On SH the displacements
+   are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
+   15 bytes in QImode.  To compensate this we create a new base address by
+   adding an adjustment value to it.
+
+   If the originally requested offset is greater than 127 we prefer using
+   values 124..127 over 128..131 to increase opportunities to use the
+   add #imm, Rn insn.
+
+   In some cases it is possible that a requested offset might seem unaligned
+   or inappropriate for the mode size, like offset = 2 and mode size = 4.
+   This is compensated by adjusting the base address so that the effective
+   address of the displacement move insn will be aligned. 
+
+   This is not the best possible way of rebasing the base address, as it
+   does not look at other present displacement addressings around it.
+   In some cases this can create more base address adjustments than would
+   actually be necessary.  */
+struct disp_adjust
+{
+  rtx offset_adjust;
+  rtx mov_disp;
+};
+
+static struct disp_adjust
+sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
+{
+  struct disp_adjust res = { NULL_RTX, NULL_RTX };
+
+  /* Do not try to use SH2A's large displacements here, because this would
+     effectively disable the small displacement insns.  */
+  const int mode_sz = GET_MODE_SIZE (mode);
+  const int mov_insn_sz = mov_insn_size (mode, false);
+  const int max_disp = sh_max_mov_insn_displacement (mode, false);
+  const int max_disp_next = max_disp + mov_insn_sz;
+  HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
+  HOST_WIDE_INT offset_adjust;
+
+  /* In some cases this actually does happen and we must check for it.  */
+  if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
+    return res;
+
+  /* Keeps the previous behavior for QImode displacement addressing.
+     This just decides how the offset is re-based.  Removing this special
+     case will result in slightly bigger code on average, but it's not that
+     bad actually.  */
+  if (mov_insn_sz == 1)
+    align_modifier = 0;
+
+  offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
+
+  if (mode_sz + offset - offset_adjust <= max_disp_next)
+    {
+      res.offset_adjust = GEN_INT (offset_adjust);
+      res.mov_disp = GEN_INT (offset - offset_adjust);
+    }
+
+  return res;
+}
+
+/* Try to modify an illegitimate address and make it legitimate.
+   If we find one, return the new, valid address.
+   Otherwise, return the original address.  */
+static rtx
+sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
+{
+  if (flag_pic)
+    x = legitimize_pic_address (oldx, mode, NULL_RTX);
+
+  if (TARGET_SHMEDIA)
+    return x;
+
+  if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
+      || (TARGET_SH2E && mode == SFmode))
+    return x;
+
+  if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
+      && BASE_REGISTER_RTX_P (XEXP (x, 0)))
+    {
+      struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
+							INTVAL (XEXP (x, 1)));
+
+      if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
+	{
+	  rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
+				  adj.offset_adjust, NULL_RTX, 0,
+				  OPTAB_LIB_WIDEN);
+	  return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
+	}
+    }
+
+  return x;
+}
+
+/* Attempt to replace *p, which is an address that needs reloading, with
+   a valid memory address for an operand of mode MODE.
+   Like for sh_legitimize_address, for the SH we try to get a normal form
+   of the address.  That will allow inheritance of the address reloads.  */
+bool
+sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
+			      int itype)
+{
+  enum reload_type type = (enum reload_type) itype;
+  const int mode_sz = GET_MODE_SIZE (mode);
+
+  if (TARGET_SHMEDIA)
+    return false;
+
+  if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
+      && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
+      && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
+      && (ALLOW_INDEXED_ADDRESS
+	  || XEXP (*p, 0) == stack_pointer_rtx
+	  || XEXP (*p, 0) == hard_frame_pointer_rtx))
+    {
+      const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
+      struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
+
+      if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
+	{
+	  push_reload (*p, NULL_RTX, p, NULL,
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
+	  return true;
+	}
+
+      if (TARGET_SH2E && mode == SFmode)
+	{
+	  *p = copy_rtx (*p);
+	  push_reload (*p, NULL_RTX, p, NULL,
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
+	  return true;
+	}
+
+      /* FIXME: Do not allow to legitimize QImode and HImode displacement
+	 moves because then reload has a problem figuring the constraint
+	 that the move insn target/source reg must be R0.
+	 Or maybe some handling is wrong in sh_secondary_reload for this
+	 to work properly? */
+      if ((mode_sz == 4 || mode_sz == 8)
+	  && ! (TARGET_SH4 && mode == DFmode)
+	  && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
+	{
+	  rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
+	  *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
+	  push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
+	  return true;
+	}
+    }
+
+  /* We must re-recognize what we created before.  */
+  if (GET_CODE (*p) == PLUS
+      && (mode_sz == 4 || mode_sz == 8)
+      && GET_CODE (XEXP (*p, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
+      && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
+      && CONST_INT_P (XEXP (*p, 1))
+      && ! (TARGET_SH2E && mode == SFmode))
+    {
+      /* Because this address is so complex, we know it must have
+	 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
+	 it is already unshared, and needs no further unsharing.  */
+      push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
+      return true;
+    }
+
+  return false;
+}
+
+/* In the name of slightly smaller debug output, and to cater to
+   general assembler lossage, recognize various UNSPEC sequences
+   and turn them back into a direct symbol reference.  */
+static rtx
+sh_delegitimize_address (rtx orig_x)
+{
+  rtx x, y;
+
+  orig_x = delegitimize_mem_from_attrs (orig_x);
+
+  x = orig_x;
+  if (MEM_P (x))
+    x = XEXP (x, 0);
+  if (GET_CODE (x) == CONST)
+    {
+      y = XEXP (x, 0);
+      if (GET_CODE (y) == UNSPEC)
+	{
+	  if (XINT (y, 1) == UNSPEC_GOT
+	      || XINT (y, 1) == UNSPEC_GOTOFF
+	      || XINT (y, 1) == UNSPEC_SYMOFF)
+	    return XVECEXP (y, 0, 0);
+	  else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
+	    {
+	      if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
+		{
+		  rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
+
+		  if (GET_CODE (symplt) == UNSPEC
+		      && XINT (symplt, 1) == UNSPEC_PLT)
+		    return XVECEXP (symplt, 0, 0);
+		}
+	    }
+	  else if (TARGET_SHMEDIA
+		   && (XINT (y, 1) == UNSPEC_EXTRACT_S16
+		       || XINT (y, 1) == UNSPEC_EXTRACT_U16))
+	    {
+	      rtx offset = XVECEXP (y, 0, 1);
+
+	      x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
+	      if (MEM_P (orig_x))
+		x = replace_equiv_address_nv (orig_x, x);
+	      return x;
+	    }
+	}
+    }
+
+  return orig_x;
+}
+
+/* Mark the use of a constant in the literal table. If the constant
+   has multiple labels, make it unique.  */
+static rtx
+mark_constant_pool_use (rtx x)
+{
+  rtx insn, lab, pattern;
+
+  if (x == NULL_RTX)
+    return x;
+
+  switch (GET_CODE (x))
+    {
+    case LABEL_REF:
+      x = XEXP (x, 0);
+    case CODE_LABEL:
+      break;
+    default:
+      return x;
+    }
+
+  /* Get the first label in the list of labels for the same constant
+     and delete another labels in the list.  */
+  lab = x;
+  for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
+    {
+      if (!LABEL_P (insn)
+	  || LABEL_REFS (insn) != NEXT_INSN (insn))
+	break;
+      lab = insn;
+    }
+
+  for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
+    INSN_DELETED_P (insn) = 1;
+
+  /* Mark constants in a window.  */
+  for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
+    {
+      if (!NONJUMP_INSN_P (insn))
+	continue;
+
+      pattern = PATTERN (insn);
+      if (GET_CODE (pattern) != UNSPEC_VOLATILE)
+	continue;
+
+      switch (XINT (pattern, 1))
+	{
+	case UNSPECV_CONST2:
+	case UNSPECV_CONST4:
+	case UNSPECV_CONST8:
+	  XVECEXP (pattern, 0, 1) = const1_rtx;
+	  break;
+	case UNSPECV_WINDOW_END:
+	  if (XVECEXP (pattern, 0, 0) == x)
+	    return lab;
+	  break;
+	case UNSPECV_CONST_END:
+	  return lab;
+	default:
+	  break;
+	}
+    }
+
+  return lab;
+}
+
+/* Return true if it's possible to redirect BRANCH1 to the destination
+   of an unconditional jump BRANCH2.  We only want to do this if the
+   resulting branch will have a short displacement.  */
+bool
+sh_can_redirect_branch (rtx branch1, rtx branch2)
+{
+  if (flag_expensive_optimizations && simplejump_p (branch2))
+    {
+      rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
+      rtx insn;
+      int distance;
+
+      for (distance = 0, insn = NEXT_INSN (branch1);
+	   insn && distance < 256;
+	   insn = PREV_INSN (insn))
+	{
+	  if (insn == dest)
+	    return true;
+	  else
+	    distance += get_attr_length (insn);
+	}
+      for (distance = 0, insn = NEXT_INSN (branch1);
+	   insn && distance < 256;
+	   insn = NEXT_INSN (insn))
+	{
+	  if (insn == dest)
+	    return true;
+	  else
+	    distance += get_attr_length (insn);
+	}
+    }
+  return false;
+}
+
+/* Return nonzero if register old_reg can be renamed to register new_reg.  */
+bool
+sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
+			 unsigned int new_reg)
+{
+  /* Interrupt functions can only use registers that have already been
+     saved by the prologue, even if they would normally be
+     call-clobbered.  */
+  if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
+    return false;
+
+  return true;
+}
+
+/* Function to update the integer COST
+   based on the relationship between INSN that is dependent on
+   DEP_INSN through the dependence LINK.  The default is to make no
+   adjustment to COST.  This can be used for example to specify to
+   the scheduler that an output- or anti-dependence does not incur
+   the same cost as a data-dependence.  The return value should be
+   the new value for COST.  */
+static int
+sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
+{
+  rtx reg, use_pat;
+
+  if (TARGET_SHMEDIA)
+    {
+      /* On SHmedia, if the dependence is an anti-dependence or
+         output-dependence, there is no cost.  */
+      if (REG_NOTE_KIND (link) != 0)
+	{
+	  /* However, dependencies between target register loads and
+	     uses of the register in a subsequent block that are separated
+	     by a conditional branch are not modelled - we have to do with
+	     the anti-dependency between the target register load and the
+	     conditional branch that ends the current block.  */
+	  if (REG_NOTE_KIND (link) == REG_DEP_ANTI
+	      && GET_CODE (PATTERN (dep_insn)) == SET
+	      && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
+		  || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
+	      && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
+	    {
+	      int orig_cost = cost;
+	      rtx note = find_reg_note (insn, REG_BR_PROB, 0);
+	      rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
+			    ? insn : JUMP_LABEL (insn));
+	      /* On the likely path, the branch costs 1, on the unlikely path,
+		 it costs 3.  */
+	      cost--;
+	      do
+		target = next_active_insn (target);
+	      while (target && ! flow_dependent_p (target, dep_insn)
+		     && --cost > 0);
+	      /* If two branches are executed in immediate succession, with the
+		 first branch properly predicted, this causes a stall at the
+		 second branch, hence we won't need the target for the
+		 second branch for two cycles after the launch of the first
+		 branch.  */
+	      if (cost > orig_cost - 2)
+		cost = orig_cost - 2;
+	    }
+	  else
+	    cost = 0;
+	}
+
+      else if (get_attr_is_mac_media (insn)
+	       && get_attr_is_mac_media (dep_insn))
+	cost = 1;
+
+      else if (! reload_completed
+	       && GET_CODE (PATTERN (insn)) == SET
+	       && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
+	       && GET_CODE (PATTERN (dep_insn)) == SET
+	       && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
+	       && cost < 4)
+	cost = 4;
+      /* Schedule the ptabs for a casesi_jump_media in preference to stuff
+	 that is needed at the target.  */
+      else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
+	       && ! flow_dependent_p (insn, dep_insn))
+	cost--;
+    }
+  else if (REG_NOTE_KIND (link) == 0)
+    {
+      enum attr_type type;
+      rtx dep_set;
+
+      if (recog_memoized (insn) < 0
+	  || recog_memoized (dep_insn) < 0)
+	return cost;
+
+      dep_set = single_set (dep_insn);
+
+      /* The latency that we specify in the scheduling description refers
+	 to the actual output, not to an auto-increment register; for that,
+	 the latency is one.  */
+      if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
+	{
+	  rtx set = single_set (insn);
+
+	  if (set
+	      && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
+	      && (!MEM_P (SET_DEST (set))
+		  || !reg_mentioned_p (SET_DEST (dep_set),
+				       XEXP (SET_DEST (set), 0))))
+	    cost = 1;
+	}
+      /* The only input for a call that is timing-critical is the
+	 function's address.  */
+      if (CALL_P (insn))
+	{
+	  rtx call = get_call_rtx_from (insn);
+	  if (call
+		  /* sibcalli_thunk uses a symbol_ref in an unspec.  */
+	      && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
+		  || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
+	    cost -= TARGET_SH4_300 ? 3 : 6;
+	}
+      /* Likewise, the most timing critical input for an sfuncs call
+	 is the function address.  However, sfuncs typically start
+	 using their arguments pretty quickly.
+	 Assume a four cycle delay for SH4 before they are needed.
+	 Cached ST40-300 calls are quicker, so assume only a one
+	 cycle delay there.
+	 ??? Maybe we should encode the delays till input registers
+	 are needed by sfuncs into the sfunc call insn.  */
+      /* All sfunc calls are parallels with at least four components.
+	 Exploit this to avoid unnecessary calls to sfunc_uses_reg.  */
+      else if (GET_CODE (PATTERN (insn)) == PARALLEL
+	       && XVECLEN (PATTERN (insn), 0) >= 4
+	       && (reg = sfunc_uses_reg (insn)))
+	{
+	  if (! reg_set_p (reg, dep_insn))
+	    cost -= TARGET_SH4_300 ? 1 : 4;
+	}
+      if (TARGET_HARD_SH4 && !TARGET_SH4_300)
+	{
+	  enum attr_type dep_type = get_attr_type (dep_insn);
+
+	  if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
+	    cost--;
+	  else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
+		   && (type = get_attr_type (insn)) != TYPE_CALL
+		   && type != TYPE_SFUNC)
+	    cost--;
+	  /* When the preceding instruction loads the shift amount of
+	     the following SHAD/SHLD, the latency of the load is increased
+	     by 1 cycle.  */
+	  if (get_attr_type (insn) == TYPE_DYN_SHIFT
+	      && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
+	      && reg_overlap_mentioned_p (SET_DEST (dep_set),
+					  XEXP (SET_SRC (single_set (insn)),
+						1)))
+	    cost++;
+	  /* When an LS group instruction with a latency of less than
+	     3 cycles is followed by a double-precision floating-point
+	     instruction, FIPR, or FTRV, the latency of the first
+	     instruction is increased to 3 cycles.  */
+	  else if (cost < 3
+		   && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
+		   && get_attr_dfp_comp (insn) == DFP_COMP_YES)
+	    cost = 3;
+	  /* The lsw register of a double-precision computation is ready one
+	     cycle earlier.  */
+	  else if (reload_completed
+		   && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
+		   && (use_pat = single_set (insn))
+		   && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
+				      SET_SRC (use_pat)))
+	    cost -= 1;
+
+	  if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
+	      && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
+	    cost -= 1;
+	}
+      else if (TARGET_SH4_300)
+	{
+	  /* Stores need their input register two cycles later.  */
+	  if (dep_set && cost >= 1
+	      && ((type = get_attr_type (insn)) == TYPE_STORE
+		  || type == TYPE_PSTORE
+		  || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
+	    {
+	      rtx set = single_set (insn);
+
+	      if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
+		  && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
+		{
+		  cost -= 2;
+		  /* But don't reduce the cost below 1 if the address depends
+		     on a side effect of dep_insn.  */
+		  if (cost < 1
+		      && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
+		    cost = 1;
+		}
+	    }
+	}
+    }
+  /* An anti-dependence penalty of two applies if the first insn is a double
+     precision fadd / fsub / fmul.  */
+  else if (!TARGET_SH4_300
+	   && REG_NOTE_KIND (link) == REG_DEP_ANTI
+	   && recog_memoized (dep_insn) >= 0
+	   && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
+	       || get_attr_type (dep_insn) == TYPE_DFP_MUL)
+	   /* A lot of alleged anti-flow dependences are fake,
+	      so check this one is real.  */
+	   && flow_dependent_p (dep_insn, insn))
+    cost = 2;
+
+  return cost;
+}
+
+/* Check if INSN is flow-dependent on DEP_INSN.  Can also be used to check
+   if DEP_INSN is anti-flow dependent on INSN.  */
+static bool
+flow_dependent_p (rtx insn, rtx dep_insn)
+{
+  rtx tmp = PATTERN (insn);
+
+  note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
+  return tmp == NULL_RTX;
+}
+
+/* A helper function for flow_dependent_p called through note_stores.  */
+static void
+flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  rtx * pinsn = (rtx *) data;
+
+  if (*pinsn && reg_referenced_p (x, *pinsn))
+    *pinsn = NULL_RTX;
+}
+
+/* For use by sh_allocate_initial_value.  Note that sh.md contains some
+   'special function' patterns (type sfunc) that clobber pr, but that
+   do not look like function calls to leaf_function_p.  Hence we must
+   do this extra check.  */
+static int
+sh_pr_n_sets (void)
+{
+  return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
+}
+
+/* Return where to allocate pseudo for a given hard register initial
+   value.  */
+static rtx
+sh_allocate_initial_value (rtx hard_reg)
+{
+  rtx x;
+
+  if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
+    {
+      if (crtl->is_leaf
+	  && ! sh_pr_n_sets ()
+	  && ! (TARGET_SHCOMPACT
+		&& ((crtl->args.info.call_cookie
+		     & ~ CALL_COOKIE_RET_TRAMP (1))
+		    || crtl->saves_all_registers)))
+	x = hard_reg;
+      else
+	x = gen_frame_mem (Pmode, return_address_pointer_rtx);
+    }
+  else
+    x = NULL_RTX;
+
+  return x;
+}
+
+/* This function returns "2" to indicate dual issue for the SH4
+   processor.  To be used by the DFA pipeline description.  */
+static int
+sh_issue_rate (void)
+{
+  if (TARGET_SUPERSCALAR)
+    return 2;
+  else
+    return 1;
+}
+
+/* Functions for ready queue reordering for sched1.  */
+
+/* Get weight for mode for a set x.  */
+static short
+find_set_regmode_weight (rtx x, enum machine_mode mode)
+{
+  if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
+    return 1;
+  if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
+    {
+      if (REG_P (SET_DEST (x)))
+	{
+	  if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
+	    return 1;
+	  else
+	    return 0;
+	}
+      return 1;
+    }
+  return 0;
+}
+
+/* Get regmode weight for insn.  */
+static short
+find_insn_regmode_weight (rtx insn, enum machine_mode mode)
+{
+  short reg_weight = 0;
+  rtx x;
+
+  /* Increment weight for each register born here.  */
+  x = PATTERN (insn);
+  reg_weight += find_set_regmode_weight (x, mode);
+  if (GET_CODE (x) == PARALLEL)
+    {
+      int j;
+      for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
+	{
+	  x = XVECEXP (PATTERN (insn), 0, j);
+	  reg_weight += find_set_regmode_weight (x, mode);
+	}
+    }
+  /* Decrement weight for each register that dies here.  */
+  for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
+    {
+      if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
+	{
+	  rtx note = XEXP (x, 0);
+	  if (REG_P (note) && GET_MODE (note) == mode)
+	    reg_weight--;
+	}
+    }
+  return reg_weight;
+}
+
+/* Calculate regmode weights for all insns of a basic block.  */
+static void
+find_regmode_weight (basic_block b, enum machine_mode mode)
+{
+  rtx insn, next_tail, head, tail;
+
+  get_ebb_head_tail (b, b, &head, &tail);
+  next_tail = NEXT_INSN (tail);
+
+  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    {
+      /* Handle register life information.  */
+      if (!INSN_P (insn))
+	continue;
+
+      if (mode == SFmode)
+	INSN_REGMODE_WEIGHT (insn, mode) =
+	  find_insn_regmode_weight (insn, mode)
+	  + 2 * find_insn_regmode_weight (insn, DFmode);
+      else if (mode == SImode)
+	INSN_REGMODE_WEIGHT (insn, mode) =
+	  find_insn_regmode_weight (insn, mode)
+	  + 2 * find_insn_regmode_weight (insn, DImode);
+    }
+}
+
+/* Comparison function for ready queue sorting.  */
+static int
+rank_for_reorder (const void *x, const void *y)
+{
+  rtx tmp = *(const rtx *) y;
+  rtx tmp2 = *(const rtx *) x;
+
+  /* The insn in a schedule group should be issued the first.  */
+  if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
+    return SCHED_GROUP_P (tmp2) ? 1 : -1;
+
+  /* If insns are equally good, sort by INSN_LUID (original insn order), This
+     minimizes instruction movement, thus minimizing sched's effect on
+     register pressure.  */
+  return INSN_LUID (tmp) - INSN_LUID (tmp2);
+}
+
+/* Resort the array A in which only element at index N may be out of order.  */
+static void
+swap_reorder (rtx *a, int n)
+{
+  rtx insn = a[n - 1];
+  int i = n - 2;
+
+  while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
+    {
+      a[i + 1] = a[i];
+      i -= 1;
+    }
+  a[i + 1] = insn;
+}
+
+/* Sort the ready list by ascending priority.  */
+static void
+ready_reorder (rtx *ready, int nready)
+{
+  if (nready == 2)
+    swap_reorder (ready, nready);
+  else if (nready > 2)
+     qsort (ready, nready, sizeof (rtx), rank_for_reorder);
+}
+
+/* Count life regions of r0 for a block.  */
+static int
+find_r0_life_regions (basic_block b)
+{
+  rtx end, insn;
+  rtx pset;
+  rtx r0_reg;
+  int live;
+  int set;
+  int death = 0;
+
+  if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
+    {
+      set = 1;
+      live = 1;
+    }
+  else
+    {
+      set = 0;
+      live = 0;
+    }
+
+  insn = BB_HEAD (b);
+  end = BB_END (b);
+  r0_reg = gen_rtx_REG (SImode, R0_REG);
+  while (1)
+    {
+      if (INSN_P (insn))
+	{
+	  if (find_regno_note (insn, REG_DEAD, R0_REG))
+	    {
+	      death++;
+	      live = 0;
+	    }
+	  if (!live
+	      && (pset = single_set (insn))
+	      && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
+	      && !find_regno_note (insn, REG_UNUSED, R0_REG))
+	    {
+	      set++;
+	      live = 1;
+	    }
+	}
+      if (insn == end)
+	break;
+      insn = NEXT_INSN (insn);
+    }
+  return set - death;
+}
+
+/* Calculate regmode weights for all insns of all basic block.  */
+static void
+sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
+		   int verbose ATTRIBUTE_UNUSED,
+		   int old_max_uid)
+{
+  basic_block b;
+
+  regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
+  regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
+  r0_life_regions = 0;
+
+  FOR_EACH_BB_REVERSE_FN (b, cfun)
+  {
+    find_regmode_weight (b, SImode);
+    find_regmode_weight (b, SFmode);
+    if (!reload_completed)
+      r0_life_regions += find_r0_life_regions (b);
+  }
+
+  CURR_REGMODE_PRESSURE (SImode) = 0;
+  CURR_REGMODE_PRESSURE (SFmode) = 0;
+}
+
+/* Cleanup.  */
+static void
+sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
+		     int verbose ATTRIBUTE_UNUSED)
+{
+  if (regmode_weight[0])
+    {
+      free (regmode_weight[0]);
+      regmode_weight[0] = NULL;
+    }
+  if (regmode_weight[1])
+    {
+      free (regmode_weight[1]);
+      regmode_weight[1] = NULL;
+    }
+}
+
+/* The scalar modes supported differs from the default version in TImode
+   for 32-bit SHMEDIA.  */
+static bool
+sh_scalar_mode_supported_p (enum machine_mode mode)
+{
+  if (TARGET_SHMEDIA32 && mode == TImode)
+    return false;
+
+  return default_scalar_mode_supported_p (mode);
+}
+
+/* Cache the can_issue_more so that we can return it from reorder2. Also,
+   keep count of register pressures on SImode and SFmode. */
+static int
+sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
+		   int sched_verbose ATTRIBUTE_UNUSED,
+		   rtx insn,
+		   int can_issue_more)
+{
+  if (GET_CODE (PATTERN (insn)) != USE
+      && GET_CODE (PATTERN (insn)) != CLOBBER)
+    cached_can_issue_more = can_issue_more - 1;
+  else
+    cached_can_issue_more = can_issue_more;
+
+  if (reload_completed)
+    return cached_can_issue_more;
+
+  CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
+  CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
+
+  return cached_can_issue_more;
+}
+
+static void
+sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
+	    int verbose ATTRIBUTE_UNUSED,
+	    int veclen ATTRIBUTE_UNUSED)
+{
+  CURR_REGMODE_PRESSURE (SImode) = 0;
+  CURR_REGMODE_PRESSURE (SFmode) = 0;
+}
+
+/* Some magic numbers.  */
+/* Pressure on register r0 can lead to spill failures. so avoid sched1 for
+   functions that already have high pressure on r0. */
+#define R0_MAX_LIFE_REGIONS 2
+/* Register Pressure thresholds for SImode and SFmode registers.  */
+#define SIMODE_MAX_WEIGHT 5
+#define SFMODE_MAX_WEIGHT 10
+
+/* Return true if the pressure is high for MODE.  */
+static bool
+high_pressure (enum machine_mode mode)
+{
+  /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
+     functions that already have high pressure on r0. */
+   if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
+     return true;
+
+  if (mode == SFmode)
+    return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
+  else
+    return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
+}
+
+/* Reorder ready queue if register pressure is high.  */
+static int
+sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
+	    int sched_verbose ATTRIBUTE_UNUSED,
+	    rtx *ready,
+	    int *n_readyp,
+	    int clock_var ATTRIBUTE_UNUSED)
+{
+  if (reload_completed)
+    return sh_issue_rate ();
+
+  if (high_pressure (SFmode) || high_pressure (SImode))
+    {
+      ready_reorder (ready, *n_readyp);
+    }
+
+  return sh_issue_rate ();
+}
+
+/* Skip cycles if the current register pressure is high.  */
+static int
+sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
+	     int sched_verbose ATTRIBUTE_UNUSED,
+	     rtx *ready ATTRIBUTE_UNUSED,
+	     int *n_readyp ATTRIBUTE_UNUSED,
+	     int clock_var ATTRIBUTE_UNUSED)
+{
+  if (reload_completed)
+    return cached_can_issue_more;
+
+  if (high_pressure(SFmode) || high_pressure (SImode))
+    skip_cycles = 1;
+
+  return cached_can_issue_more;
+}
+
+/* Skip cycles without sorting the ready queue. This will move insn from
+   Q->R. If this is the last cycle we are skipping; allow sorting of ready
+   queue by sh_reorder.  */
+
+/* Generally, skipping these many cycles are sufficient for all insns to move
+   from Q -> R.  */
+#define MAX_SKIPS 8
+
+static int
+sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
+		  int sched_verbose ATTRIBUTE_UNUSED,
+		  rtx insn ATTRIBUTE_UNUSED,
+		  int last_clock_var,
+		  int clock_var,
+		  int *sort_p)
+{
+  if (reload_completed)
+    return 0;
+
+  if (skip_cycles)
+    {
+      if ((clock_var - last_clock_var) < MAX_SKIPS)
+	{
+	  *sort_p = 0;
+	  return 1;
+	}
+      /* If this is the last cycle we are skipping, allow reordering of R.  */
+      if ((clock_var - last_clock_var) == MAX_SKIPS)
+	{
+	  *sort_p = 1;
+	  return 1;
+	}
+    }
+
+  skip_cycles = 0;
+
+  return 0;
+}
+
+/* SHmedia requires registers for branches, so we can't generate new
+   branches past reload.  */
+static bool
+sh_cannot_modify_jumps_p (void)
+{
+  return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
+}
+
+static reg_class_t
+sh_target_reg_class (void)
+{
+  return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
+}
+
+static bool
+sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
+{
+  if (! shmedia_space_reserved_for_target_registers)
+    return 0;
+  if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
+    return 0;
+
+  HARD_REG_SET dummy;
+  if (calc_live_regs (&dummy) >= 6 * 8)
+    return 1;
+  return 0;
+}
+
+static bool
+sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
+{
+  return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
+}
+
+/*
+   On the SH1..SH4, the trampoline looks like
+   2 0002 D202     	   	mov.l	l2,r2
+   1 0000 D301     		mov.l	l1,r3
+   3 0004 422B     		jmp	@r2
+   4 0006 0009     		nop
+   5 0008 00000000 	l1:  	.long   area
+   6 000c 00000000 	l2:	.long   function
+
+   SH5 (compact) uses r1 instead of r3 for the static chain.  */
+
+
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+   FNADDR is an RTX for the address of the function's pure code.
+   CXT is an RTX for the static chain value for the function.  */
+static void
+sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
+
+  if (TARGET_SHMEDIA64)
+    {
+      rtx tramp_templ;
+      int fixed_len;
+
+      rtx movi1 = GEN_INT (0xcc000010);
+      rtx shori1 = GEN_INT (0xc8000010);
+      rtx src, dst;
+
+      /* The following trampoline works within a +- 128 KB range for cxt:
+	 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
+	 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
+	 gettr tr1,r1; blink tr0,r63  */
+      /* Address rounding makes it hard to compute the exact bounds of the
+	 offset for this trampoline, but we have a rather generous offset
+	 range, so frame_offset should do fine as an upper bound.  */
+      if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
+	{
+	  /* ??? could optimize this trampoline initialization
+	     by writing DImode words with two insns each.  */
+	  rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
+	  rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
+	  insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  /* Or in ptb/u .,tr1 pattern */
+	  insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
+	  insn = force_operand (insn, NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
+	  insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
+	  insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
+	  insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
+	  insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
+	  insn = gen_rtx_AND (DImode, insn, mask);
+	  insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
+	  insn = gen_lowpart (SImode, insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 20),
+			  GEN_INT (0x6bf10600));
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 24),
+			  GEN_INT (0x4415fc10));
+	  emit_move_insn (adjust_address (tramp_mem, SImode, 28),
+			  GEN_INT (0x4401fff0));
+	  emit_insn (gen_ic_invalidate_line (tramp));
+	  return;
+	}
+      tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
+      fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
+
+      tramp_templ = gen_datalabel_ref (tramp_templ);
+      dst = tramp_mem;
+      src = gen_const_mem (BLKmode, tramp_templ);
+      set_mem_align (dst, 256);
+      set_mem_align (src, 64);
+      emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
+
+      emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
+      emit_move_insn (adjust_address (tramp_mem, Pmode,
+				      fixed_len + GET_MODE_SIZE (Pmode)),
+		      cxt);
+      emit_insn (gen_ic_invalidate_line (tramp));
+      return;
+    }
+  else if (TARGET_SHMEDIA)
+    {
+      /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
+         movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63  */
+      rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
+      rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
+      /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010  concatenated,
+	 rotated 10 right, and higher 16 bit of every 32 selected.  */
+      rtx movishori
+	= force_reg (V2HImode, (simplify_gen_subreg
+				(V2HImode, GEN_INT (0x4330432), SImode, 0)));
+      rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
+      rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
+
+      fnaddr = force_reg (SImode, fnaddr);
+      cxt = force_reg (SImode, cxt);
+      emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
+				 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
+				 movishori));
+      emit_insn (gen_rotrdi3_mextr (quad0, quad0,
+				    GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
+      emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
+      emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
+      emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
+				 gen_rtx_SUBREG (V2HImode, cxt, 0),
+				 movishori));
+      emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
+				    GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
+      emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
+      if (TARGET_LITTLE_ENDIAN)
+	{
+	  emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
+	  emit_insn (gen_mextr4 (quad2, cxtload, blink));
+	}
+      else
+	{
+	  emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
+	  emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
+	}
+      emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
+      emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
+      emit_insn (gen_ic_invalidate_line (tramp));
+      return;
+    }
+  else if (TARGET_SHCOMPACT)
+    {
+      emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
+      return;
+    }
+  emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
+				SImode));
+  emit_move_insn (adjust_address (tramp_mem, SImode, 4),
+		  gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
+				SImode));
+  emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
+  emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
+  if (TARGET_HARD_SH4 || TARGET_SH5)
+    {
+      if (!TARGET_INLINE_IC_INVALIDATE
+	  || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
+	emit_library_call (function_symbol (NULL, "__ic_invalidate",
+					    FUNCTION_ORDINARY),
+			   LCT_NORMAL, VOIDmode, 1, tramp, SImode);
+      else
+	emit_insn (gen_ic_invalidate_line (tramp));
+    }
+}
+
+/* On SH5, trampolines are SHmedia code, so add 1 to the address.  */
+static rtx
+sh_trampoline_adjust_address (rtx tramp)
+{
+  if (TARGET_SHMEDIA)
+    tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
+				 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
+  return tramp;
+}
+
+/* FIXME: This is overly conservative.  A SHcompact function that
+   receives arguments ``by reference'' will have them stored in its
+   own stack frame, so it must not pass pointers or references to
+   these arguments to other functions by means of sibling calls.  */
+/* If PIC, we cannot make sibling calls to global functions
+   because the PLT requires r12 to be live.  */
+static bool
+sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  return (1
+	  && (! TARGET_SHCOMPACT
+	      || crtl->args.info.stack_regs == 0)
+	  && ! sh_cfun_interrupt_handler_p ()
+	  && (! flag_pic
+	      || (decl && ! TREE_PUBLIC (decl))
+	      || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
+}
+
+/* Machine specific built-in functions.  */
+
+struct builtin_description
+{
+  bool (* const is_enabled) (void);
+  const enum insn_code icode;
+  const char *const name;
+  int signature;
+  tree fndecl;
+};
+
+static bool
+shmedia_builtin_p (void)
+{
+  return TARGET_SHMEDIA;
+}
+
+/* This function can be used if there are any built-ins that are not for
+   SHmedia.  It's commented out to avoid the defined-but-unused warning.
+static bool
+sh1_builtin_p (void)
+{
+  return TARGET_SH1;
+}
+*/
+
+/* describe number and signedness of arguments; arg[0] == result
+   (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
+/* 9: 64-bit pointer, 10: 32-bit pointer */
+static const char signature_args[][4] =
+{
+#define SH_BLTIN_V2SI2 0
+  { 4, 4 },
+#define SH_BLTIN_V4HI2 1
+  { 4, 4 },
+#define SH_BLTIN_V2SI3 2
+  { 4, 4, 4 },
+#define SH_BLTIN_V4HI3 3
+  { 4, 4, 4 },
+#define SH_BLTIN_V8QI3 4
+  { 4, 4, 4 },
+#define SH_BLTIN_MAC_HISI 5
+  { 1, 4, 4, 1 },
+#define SH_BLTIN_SH_HI 6
+  { 4, 4, 1 },
+#define SH_BLTIN_SH_SI 7
+  { 4, 4, 1 },
+#define SH_BLTIN_V4HI2V2SI 8
+  { 4, 4, 4 },
+#define SH_BLTIN_V4HI2V8QI 9
+  { 4, 4, 4 },
+#define SH_BLTIN_SISF 10
+  { 4, 2 },
+#define SH_BLTIN_LDUA_L 11
+  { 2, 10 },
+#define SH_BLTIN_LDUA_Q 12
+  { 1, 10 },
+#define SH_BLTIN_STUA_L 13
+  { 0, 10, 2 },
+#define SH_BLTIN_STUA_Q 14
+  { 0, 10, 1 },
+#define SH_BLTIN_LDUA_L64 15
+  { 2, 9 },
+#define SH_BLTIN_LDUA_Q64 16
+  { 1, 9 },
+#define SH_BLTIN_STUA_L64 17
+  { 0, 9, 2 },
+#define SH_BLTIN_STUA_Q64 18
+  { 0, 9, 1 },
+#define SH_BLTIN_NUM_SHARED_SIGNATURES 19
+#define SH_BLTIN_2 19
+#define SH_BLTIN_SU 19
+  { 1, 2 },
+#define SH_BLTIN_3 20
+#define SH_BLTIN_SUS 20
+  { 2, 2, 1 },
+#define SH_BLTIN_PSSV 21
+  { 0, 8, 2, 2 },
+#define SH_BLTIN_XXUU 22
+#define SH_BLTIN_UUUU 22
+  { 1, 1, 1, 1 },
+#define SH_BLTIN_PV 23
+  { 0, 8 },
+#define SH_BLTIN_VP 24
+  { 8, 0 },
+};
+/* mcmv: operands considered unsigned.  */
+/* mmulsum_wq, msad_ubq: result considered unsigned long long.  */
+/* mperm: control value considered unsigned int.  */
+/* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int.  */
+/* mshards_q: returns signed short.  */
+/* nsb: takes long long arg, returns unsigned char.  */
+static struct builtin_description bdesc[] =
+{
+  { shmedia_builtin_p,
+    CODE_FOR_absv2si2,	"__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_absv4hi2,	"__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_addv2si3,	"__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_addv4hi3,	"__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_alloco_i,	"__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mcmv,	"__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mcnvs_lw,	"__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mcnvs_wb,	"__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mcnvs_wub,	"__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr1,	"__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr2,	"__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr3,	"__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr4,	"__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr5,	"__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr6,	"__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mextr7,	"__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmacfx_wl,	"__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mulv2si3,	"__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mulv4hi3,	"__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmulfx_l,	"__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmulfx_w,	"__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmulhi_wl,	"__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmullo_wl,	"__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mperm_w,	"__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_msad_ubq,	"__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshalds_l,	"__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshalds_w,	"__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ashrv2si3,	"__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ashrv4hi3,	"__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshards_q,	"__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshfhi_b,	"__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshfhi_l,	"__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshfhi_w,	"__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshflo_b,	"__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshflo_l,	"__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_mshflo_w,	"__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ashlv2si3,	"__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ashlv4hi3,	"__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_lshrv2si3,	"__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_lshrv4hi3,	"__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_subv2si3,	"__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_subv4hi3,	"__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_fcosa_s,	"__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_fsina_s,	"__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_fipr,	"__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ftrv,	"__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sqrtdf2,	"__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sqrtsf2,	"__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_fsrra_s,	"__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldhi_l,	"__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldhi_q,	"__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldlo_l,	"__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldlo_q,	"__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sthi_l,	"__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sthi_q,	"__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_stlo_l,	"__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_stlo_q,	"__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldhi_l64,	"__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldhi_q64,	"__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldlo_l64,	"__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_ldlo_q64,	"__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sthi_l64,	"__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_sthi_q64,	"__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_stlo_l64,	"__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_stlo_q64,	"__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_nsb,	"__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_byterev,	"__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
+  { shmedia_builtin_p,
+    CODE_FOR_prefetch,	"__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
+};
+
+static void
+sh_init_builtins (void)
+{
+  tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
+  memset (shared, 0, sizeof shared);
+
+  for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
+    {
+      builtin_description* d = &bdesc[di];
+
+      if (!d->is_enabled ())
+	continue;
+
+      tree type, arg_type = NULL_TREE;
+      int signature = d->signature;
+
+      if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
+	type = shared[signature];
+      else
+	{
+	  int has_result = signature_args[signature][0] != 0;
+	  tree args[3];
+
+	  if ((signature_args[signature][1] & 8)
+	      && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
+		  || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
+	    continue;
+	  if (! TARGET_FPU_ANY
+	      && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
+	    continue;
+	  for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
+	    args[i] = NULL_TREE;
+	  for (int i = 3; ; i--)
+	    {
+	      int arg = signature_args[signature][i];
+	      int opno = i - 1 + has_result;
+
+	      if (arg & 8)
+		arg_type = ptr_type_node;
+	      else if (arg)
+		arg_type = (*lang_hooks.types.type_for_mode)
+		  (insn_data[d->icode].operand[opno].mode, (arg & 1));
+	      else if (i)
+		continue;
+	      else
+		arg_type = void_type_node;
+	      if (i == 0)
+		break;
+	      args[i-1] = arg_type;
+	    }
+	  type = build_function_type_list (arg_type, args[0], args[1],
+					   args[2], NULL_TREE);
+	  if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
+	    shared[signature] = type;
+	}
+      d->fndecl =
+	add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
+			      NULL, NULL_TREE);
+    }
+}
+
+/* Implements target hook vector_mode_supported_p.  */
+bool
+sh_vector_mode_supported_p (enum machine_mode mode)
+{
+  if (TARGET_FPU_ANY
+      && ((mode == V2SFmode)
+	  || (mode == V4SFmode)
+	  || (mode == V16SFmode)))
+    return true;
+
+  else if (TARGET_SHMEDIA
+	   && ((mode == V8QImode)
+	       || (mode == V2HImode)
+	       || (mode == V4HImode)
+	       || (mode == V2SImode)))
+    return true;
+
+  return false;
+}
+
+bool
+sh_frame_pointer_required (void)
+{
+/* If needed override this in other tm.h files to cope with various OS 
+   lossage requiring a frame pointer.  */
+  if (SUBTARGET_FRAME_POINTER_REQUIRED)
+    return true;
+
+  if (crtl->profile)
+    return true;
+
+  return false;
+}
+
+/* Implements target hook dwarf_calling_convention.  Return an enum
+   of dwarf_calling_convention.  */
+int
+sh_dwarf_calling_convention (const_tree func)
+{
+  if (sh_attr_renesas_p (func))
+    return DW_CC_GNU_renesas_sh;
+
+  return DW_CC_normal;
+}
+
+/* Returns the sh builtin decl for CODE.  */
+static tree
+sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= ARRAY_SIZE (bdesc))
+    return error_mark_node;
+
+  if (!bdesc[code].is_enabled ())
+    return error_mark_node;
+
+  return bdesc[code].fndecl;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+static rtx
+sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		   enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
+{
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  const struct builtin_description *d = &bdesc[fcode];
+  enum insn_code icode = d->icode;
+  int signature = d->signature;
+  int nop = 0;
+  rtx op[4];
+
+  if (signature_args[signature][0])
+    {
+      if (ignore)
+	return NULL_RTX;
+
+      enum machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (! target || GET_MODE (target) != tmode
+	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+      op[nop++] = target;
+    }
+  else
+    target = NULL_RTX;
+
+  for (int i = 1; i <= 3; i++, nop++)
+    {
+      tree arg;
+      enum machine_mode opmode, argmode;
+      tree optype;
+
+      if (! signature_args[signature][i])
+	break;
+      arg = CALL_EXPR_ARG (exp, i - 1);
+      if (arg == error_mark_node)
+	return const0_rtx;
+      if (signature_args[signature][i] & 8)
+	{
+	  opmode = ptr_mode;
+	  optype = ptr_type_node;
+	}
+      else
+	{
+	  opmode = insn_data[icode].operand[nop].mode;
+	  optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
+	}
+      argmode = TYPE_MODE (TREE_TYPE (arg));
+      if (argmode != opmode)
+	arg = build1 (NOP_EXPR, optype, arg);
+      op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
+      if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
+	op[nop] = copy_to_mode_reg (opmode, op[nop]);
+    }
+
+  rtx pat = NULL_RTX;
+
+  switch (nop)
+    {
+    case 1:
+      pat = (*insn_data[d->icode].genfun) (op[0]);
+      break;
+    case 2:
+      pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
+      break;
+    case 3:
+      pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
+      break;
+    case 4:
+      pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (! pat)
+    return NULL_RTX;
+  emit_insn (pat);
+  return target;
+}
+
+void
+sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx sel0 = const0_rtx;
+  rtx sel1 = const1_rtx;
+  rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
+  rtx op = gen_rtx_fmt_e (code, SFmode, op1);
+
+  emit_insn ((*fn) (op0, op1, op, sel0, sel0));
+  emit_insn ((*fn) (op0, op1, op, sel1, sel1));
+}
+
+void
+sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
+{
+  rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
+
+  emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
+  emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
+}
+
+/* Return true if hard register REGNO can hold a value of machine-mode MODE.
+   We can allow any mode in any general register.  The special registers
+   only allow SImode.  Don't allow any mode in the PR.
+
+   We cannot hold DCmode values in the XD registers because alter_reg
+   handles subregs of them incorrectly.  We could work around this by
+   spacing the XD registers like the DR registers, but this would require
+   additional memory in every compilation to hold larger register vectors.
+   We could hold SFmode / SCmode values in XD registers, but that
+   would require a tertiary reload when reloading from / to memory,
+   and a secondary reload to reload from / to general regs; that
+   seems to be a losing proposition.
+
+   We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
+   it won't be ferried through GP registers first.  */
+bool
+sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
+{
+  if (SPECIAL_REGISTER_P (regno))
+    return mode == SImode;
+
+  if (regno == FPUL_REG)
+    return (mode == SImode || mode == SFmode);
+
+  if (FP_REGISTER_P (regno) && mode == SFmode)
+    return true;
+
+  if (mode == V2SFmode)
+    {
+      if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
+	   || GENERAL_REGISTER_P (regno)))
+	return true;
+      else
+	return false;
+    }
+
+  if (mode == V4SFmode)
+    {
+      if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
+	  || GENERAL_REGISTER_P (regno))
+	return true;
+      else
+	return false;
+    }
+
+  if (mode == V16SFmode)
+    {
+      if (TARGET_SHMEDIA)
+	{
+	  if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
+	    return true;
+	  else
+	    return false;
+	}
+      else
+	return regno == FIRST_XD_REG;
+    }
+
+  if (FP_REGISTER_P (regno))
+    {
+      if (mode == SFmode
+	  || mode == SImode
+	  || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
+	  || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
+	       || mode == DCmode
+	       || (TARGET_SHMEDIA
+		   && (mode == DFmode || mode == DImode
+		       || mode == V2SFmode || mode == TImode)))
+	      && ((regno - FIRST_FP_REG) & 1) == 0)
+	  || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
+	      && ((regno - FIRST_FP_REG) & 3) == 0))
+	return true;
+      else
+	return false;
+    }
+
+  if (XD_REGISTER_P (regno))
+    return mode == DFmode;
+
+  if (TARGET_REGISTER_P (regno))
+    return (mode == DImode || mode == SImode || mode == PDImode);
+
+  if (regno == PR_REG)
+    return mode == SImode;
+
+  if (regno == FPSCR_REG)
+    return mode == PSImode;
+
+  /* FIXME.  This works around PR target/37633 for -O0.  */
+  if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
+    {
+      unsigned int n = GET_MODE_SIZE (mode) / 8;
+
+      if (regno >= FIRST_GENERAL_REG + 10 - n + 1
+	  && regno <= FIRST_GENERAL_REG + 14)
+	return false;
+    }
+
+  return true;
+}
+
+/* Return the class of registers for which a mode change from FROM to TO
+   is invalid.  */
+bool
+sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
+			     enum reg_class rclass)
+{
+  /* We want to enable the use of SUBREGs as a means to
+     VEC_SELECT a single element of a vector.  */
+
+  /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
+     This can be problematic when SFmode vector subregs need to be accessed
+     on the stack with displacement addressing, as it happens with -O0.
+     Thus we disallow the mode change for -O0.  */
+  if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
+    return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
+
+  if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
+    {
+      if (TARGET_LITTLE_ENDIAN)
+	{
+	  if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
+	    return reg_classes_intersect_p (DF_REGS, rclass);
+	}
+      else
+	{
+	  if (GET_MODE_SIZE (from) < 8)
+	    return reg_classes_intersect_p (DF_REGS, rclass);
+	}
+    }
+  return false;
+}
+
+/* Return true if registers in machine mode MODE will likely be
+   allocated to registers in small register classes.  */
+bool
+sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return (! TARGET_SHMEDIA);
+}
+
+/* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
+   that label is used.  */
+void
+sh_mark_label (rtx address, int nuses)
+{
+  if (GOTOFF_P (address))
+    {
+      /* Extract the label or symbol.  */
+      address = XEXP (address, 0);
+      if (GET_CODE (address) == PLUS)
+	address = XEXP (address, 0);
+      address = XVECEXP (address, 0, 0);
+    }
+  if (GET_CODE (address) == LABEL_REF
+      && LABEL_P (XEXP (address, 0)))
+    LABEL_NUSES (XEXP (address, 0)) += nuses;
+}
+
+/* Compute extra cost of moving data between one register class
+   and another.
+
+   If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
+   uses this information.  Hence, the general register <-> floating point
+   register information here is not used for SFmode.  */
+static int
+sh_register_move_cost (enum machine_mode mode,
+		       reg_class_t srcclass, reg_class_t dstclass)
+{
+  if (dstclass == T_REGS || dstclass == PR_REGS)
+    return 10;
+
+  if (dstclass == MAC_REGS && srcclass == MAC_REGS)
+    return 4;
+
+  if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
+      && REGCLASS_HAS_FP_REG (srcclass)
+      && REGCLASS_HAS_FP_REG (dstclass))
+    return 4;
+
+  if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
+    return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
+
+  if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
+      || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
+    return 9;
+
+  if ((REGCLASS_HAS_FP_REG (dstclass)
+       && REGCLASS_HAS_GENERAL_REG (srcclass))
+      || (REGCLASS_HAS_GENERAL_REG (dstclass)
+	  && REGCLASS_HAS_FP_REG (srcclass)))
+    {
+      /* Discourage trying to use fp regs for a pointer.  This also
+	 discourages fp regs with SImode because Pmode is an alias
+	 of SImode on this target.  See PR target/48596.  */
+      int addend = (mode == Pmode) ? 40 : 0;
+
+      return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
+	      * ((GET_MODE_SIZE (mode) + 7) / 8U));
+    }
+
+  if ((dstclass == FPUL_REGS
+       && REGCLASS_HAS_GENERAL_REG (srcclass))
+      || (srcclass == FPUL_REGS
+	  && REGCLASS_HAS_GENERAL_REG (dstclass)))
+    return 5;
+
+  if ((dstclass == FPUL_REGS
+       && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
+      || (srcclass == FPUL_REGS
+	  && (dstclass == PR_REGS || dstclass == MAC_REGS)))
+    return 7;
+
+  if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
+      || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
+    return 20;
+
+  /* ??? ptabs faults on (value & 0x3) == 0x3  */
+  if (TARGET_SHMEDIA
+      && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
+    {
+      if (sh_gettrcost >= 0)
+	return sh_gettrcost;
+      else if (!TARGET_PT_FIXED)
+	return 100;
+    }
+
+  if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
+      || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
+  return 4;
+
+  if (TARGET_SHMEDIA
+      || (TARGET_FMOVD
+	  && ! REGCLASS_HAS_GENERAL_REG (srcclass)
+	  && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
+    return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
+
+  return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
+}
+
+static rtx
+emit_load_ptr (rtx reg, rtx addr)
+{
+  rtx mem = gen_const_mem (ptr_mode, addr);
+
+  if (Pmode != ptr_mode)
+    mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
+  return emit_move_insn (reg, mem);
+}
+
+static void
+sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+		    HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		    tree function)
+{
+  CUMULATIVE_ARGS cum;
+  int structure_value_byref = 0;
+  rtx this_rtx, this_value, sibcall, insns, funexp;
+  tree funtype = TREE_TYPE (function);
+  int simple_add = CONST_OK_FOR_ADD (delta);
+  int did_load = 0;
+  rtx scratch0, scratch1, scratch2;
+  unsigned i;
+
+  reload_completed = 1;
+  epilogue_completed = 1;
+  crtl->uses_only_leaf_regs = 1;
+
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Find the "this" pointer.  We have such a wide range of ABIs for the
+     SH that it's best to do this completely machine independently.
+     "this" is passed as first argument, unless a structure return pointer
+     comes first, in which case "this" comes second.  */
+  INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
+#ifndef PCC_STATIC_STRUCT_RETURN
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    structure_value_byref = 1;
+#endif /* not PCC_STATIC_STRUCT_RETURN */
+  if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
+    {
+      tree ptype = build_pointer_type (TREE_TYPE (funtype));
+
+      sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
+    }
+  this_rtx
+    = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
+
+  /* For SHcompact, we only have r0 for a scratch register: r1 is the
+     static chain pointer (even if you can't have nested virtual functions
+     right now, someone might implement them sometime), and the rest of the
+     registers are used for argument passing, are callee-saved, or reserved.  */
+  /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
+     -ffixed-reg has been used.  */
+  if (! call_used_regs[0] || fixed_regs[0])
+    error ("r0 needs to be available as a call-clobbered register");
+  scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
+  if (! TARGET_SH5)
+    {
+      if (call_used_regs[1] && ! fixed_regs[1])
+	scratch1 = gen_rtx_REG (ptr_mode, 1);
+      /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
+	 pointing where to return struct values.  */
+      if (call_used_regs[3] && ! fixed_regs[3])
+	scratch2 = gen_rtx_REG (Pmode, 3);
+    }
+  else if (TARGET_SHMEDIA)
+    {
+      for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
+	if (i != REGNO (scratch0) &&
+	    call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
+	  {
+	    scratch1 = gen_rtx_REG (ptr_mode, i);
+	    break;
+	  }
+      if (scratch1 == scratch0)
+	error ("need a second call-clobbered general purpose register");
+      for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
+	if (call_used_regs[i] && ! fixed_regs[i])
+	  {
+	    scratch2 = gen_rtx_REG (Pmode, i);
+	    break;
+	  }
+      if (scratch2 == scratch0)
+	error ("need a call-clobbered target register");
+    }
+
+  this_value = plus_constant (Pmode, this_rtx, delta);
+  if (vcall_offset
+      && (simple_add || scratch0 != scratch1)
+      && strict_memory_address_p (ptr_mode, this_value))
+    {
+      emit_load_ptr (scratch0, this_value);
+      did_load = 1;
+    }
+
+  if (!delta)
+    ; /* Do nothing.  */
+  else if (simple_add)
+    emit_move_insn (this_rtx, this_value);
+  else
+    {
+      emit_move_insn (scratch1, GEN_INT (delta));
+      emit_insn (gen_add2_insn (this_rtx, scratch1));
+    }
+
+  if (vcall_offset)
+    {
+      rtx offset_addr;
+
+      if (!did_load)
+	emit_load_ptr (scratch0, this_rtx);
+
+      offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
+      if (strict_memory_address_p (ptr_mode, offset_addr))
+	; /* Do nothing.  */
+      else if (! TARGET_SH5 && scratch0 != scratch1)
+	{
+	  /* scratch0 != scratch1, and we have indexed loads.  Get better
+	     schedule by loading the offset into r1 and using an indexed
+	     load - then the load of r1 can issue before the load from
+	     (this_rtx + delta) finishes.  */
+	  emit_move_insn (scratch1, GEN_INT (vcall_offset));
+	  offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
+	}
+      else if (CONST_OK_FOR_ADD (vcall_offset))
+	{
+	  emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
+	  offset_addr = scratch0;
+	}
+      else if (scratch0 != scratch1)
+	{
+	  emit_move_insn (scratch1, GEN_INT (vcall_offset));
+	  emit_insn (gen_add2_insn (scratch0, scratch1));
+	  offset_addr = scratch0;
+	}
+      else
+	gcc_unreachable (); /* FIXME */
+      emit_load_ptr (scratch0, offset_addr);
+
+      if (Pmode != ptr_mode)
+	scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
+      emit_insn (gen_add2_insn (this_rtx, scratch0));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (! TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+  /* If the function is overridden, so is the thunk, hence we don't
+     need GOT addressing even if this is a public symbol.  */
+#if 0
+  if (TARGET_SH1 && ! flag_weak)
+    sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
+  else
+#endif
+  if (TARGET_SH2 && flag_pic)
+    {
+      sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
+      XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
+    }
+  else
+    {
+      if (TARGET_SHMEDIA && flag_pic)
+	{
+	  funexp = gen_sym2PIC (funexp);
+	  PUT_MODE (funexp, Pmode);
+	}
+      emit_move_insn (scratch2, funexp);
+      funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
+      sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
+    }
+  sibcall = emit_call_insn (sibcall);
+  SIBLING_CALL_P (sibcall) = 1;
+  use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
+  emit_barrier ();
+
+  /* Run just enough of rest_of_compilation to do scheduling and get
+     the insns emitted.  Note that use_thunk calls
+     assemble_start_function and assemble_end_function.  */
+
+  insns = get_insns ();
+
+  if (optimize > 0)
+    {
+      if (! cfun->cfg)
+	init_flow (cfun);
+      split_all_insns_noflow ();
+    }
+
+  sh_reorg ();
+  shorten_branches (insns);
+  final_start_function (insns, file, 1);
+  final (insns, file, 1);
+  final_end_function ();
+
+  reload_completed = 0;
+  epilogue_completed = 0;
+}
+
+rtx
+function_symbol (rtx target, const char *name, enum sh_function_kind kind)
+{
+  rtx sym;
+
+  /* If this is not an ordinary function, the name usually comes from a
+     string literal or an sprintf buffer.  Make sure we use the same
+     string consistently, so that cse will be able to unify address loads.  */
+  if (kind != FUNCTION_ORDINARY)
+    name = IDENTIFIER_POINTER (get_identifier (name));
+  sym = gen_rtx_SYMBOL_REF (Pmode, name);
+  SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
+  if (flag_pic)
+    switch (kind)
+      {
+      case FUNCTION_ORDINARY:
+	break;
+      case SFUNC_GOT:
+	{
+	  rtx reg = target ? target : gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_symGOT2reg (reg, sym));
+	  sym = reg;
+	  break;
+	}
+      case SFUNC_STATIC:
+	{
+	  /* ??? To allow cse to work, we use GOTOFF relocations.
+	     We could add combiner patterns to transform this into
+	     straight pc-relative calls with sym2PIC / bsrf when
+	     label load and function call are still 1:1 and in the
+	     same basic block during combine.  */
+	  rtx reg = target ? target : gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_symGOTOFF2reg (reg, sym));
+	  sym = reg;
+	  break;
+	}
+      }
+  if (target && sym != target)
+    {
+      emit_move_insn (target, sym);
+      return target;
+    }
+  return sym;
+}
+
+/* Find the number of a general purpose register in S.  */
+static int
+scavenge_reg (HARD_REG_SET *s)
+{
+  int r;
+  for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
+    if (TEST_HARD_REG_BIT (*s, r))
+      return r;
+  return -1;
+}
+
+rtx
+sh_get_pr_initial_val (void)
+{
+  rtx val;
+
+  /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
+     PR register on SHcompact, because it might be clobbered by the prologue.
+     We check first if that is known to be the case.  */
+  if (TARGET_SHCOMPACT
+      && ((crtl->args.info.call_cookie
+	   & ~ CALL_COOKIE_RET_TRAMP (1))
+	  || crtl->saves_all_registers))
+    return gen_frame_mem (SImode, return_address_pointer_rtx);
+
+  /* If we haven't finished rtl generation, there might be a nonlocal label
+     that we haven't seen yet.
+     ??? get_hard_reg_initial_val fails if it is called after register
+     allocation has started, unless it has been called before for the
+     same register.  And even then, we end in trouble if we didn't use
+     the register in the same basic block before.  So call
+     get_hard_reg_initial_val now and wrap it in an unspec if we might
+     need to replace it.  */
+  /* ??? We also must do this for TARGET_SH1 in general, because otherwise
+     combine can put the pseudo returned by get_hard_reg_initial_val into
+     instructions that need a general purpose registers, which will fail to
+     be recognized when the pseudo becomes allocated to PR.  */
+  val
+    = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
+  if (TARGET_SH1)
+    return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
+  return val;
+}
+
+bool
+sh_expand_t_scc (rtx operands[])
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx target = operands[0];
+  rtx op0 = operands[2];
+  rtx op1 = operands[3];
+  rtx result = target;
+  HOST_WIDE_INT val;
+
+  if (!REG_P (op0) || REGNO (op0) != T_REG
+      || !CONST_INT_P (op1))
+    return false;
+  if (!REG_P (result))
+    result = gen_reg_rtx (SImode);
+  val = INTVAL (op1);
+  if ((code == EQ && val == 1) || (code == NE && val == 0))
+    emit_insn (gen_movt (result, get_t_reg_rtx ()));
+  else if ((code == EQ && val == 0) || (code == NE && val == 1))
+    emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
+  else if (code == EQ || code == NE)
+    emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
+  else
+    return false;
+  if (result != target)
+    emit_move_insn (target, result);
+  return true;
+}
+
+/* INSN is an sfunc; return the rtx that describes the address used.  */
+static rtx
+extract_sfunc_addr (rtx insn)
+{
+  rtx pattern, part = NULL_RTX;
+  int len, i;
+
+  pattern = PATTERN (insn);
+  len = XVECLEN (pattern, 0);
+  for (i = 0; i < len; i++)
+    {
+      part = XVECEXP (pattern, 0, i);
+      if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
+	  && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
+	return XEXP (part, 0);
+    }
+  gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
+  return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
+}
+
+/* Verify that the register in use_sfunc_addr still agrees with the address
+   used in the sfunc.  This prevents fill_slots_from_thread from changing
+   use_sfunc_addr.
+   INSN is the use_sfunc_addr instruction, and REG is the register it
+   guards.  */
+bool
+check_use_sfunc_addr (rtx insn, rtx reg)
+{
+  /* Search for the sfunc.  It should really come right after INSN.  */
+  while ((insn = NEXT_INSN (insn)))
+    {
+      if (LABEL_P (insn) || JUMP_P (insn))
+	break;
+      if (! INSN_P (insn))
+	continue;
+
+      if (GET_CODE (PATTERN (insn)) == SEQUENCE)
+	insn = XVECEXP (PATTERN (insn), 0, 0);
+      if (GET_CODE (PATTERN (insn)) != PARALLEL
+	  || get_attr_type (insn) != TYPE_SFUNC)
+	continue;
+      return rtx_equal_p (extract_sfunc_addr (insn), reg);
+    }
+  gcc_unreachable ();
+}
+
+/* This function returns a constant rtx that represents 2**15 / pi in
+   SFmode.  It's used to scale a fixed-point signed 16.16-bit fraction
+   of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi.  */
+static GTY(()) rtx sh_fsca_sf2int_rtx;
+
+rtx
+sh_fsca_sf2int (void)
+{
+  if (! sh_fsca_sf2int_rtx)
+    {
+      REAL_VALUE_TYPE rv;
+
+      real_from_string (&rv, "10430.378350470453");
+      sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
+    }
+
+  return sh_fsca_sf2int_rtx;
+}
+
+/* This function returns a constant rtx that represents pi / 2**15 in
+   SFmode.  It's used to scale SFmode angles, in radians, to a
+   fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
+   maps to 0x10000.  */
+static GTY(()) rtx sh_fsca_int2sf_rtx;
+
+rtx
+sh_fsca_int2sf (void)
+{
+  if (! sh_fsca_int2sf_rtx)
+    {
+      REAL_VALUE_TYPE rv;
+
+      real_from_string (&rv, "9.587379924285257e-5");
+      sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
+    }
+
+  return sh_fsca_int2sf_rtx;
+}
+
+/* Initialize the CUMULATIVE_ARGS structure.  */
+void
+sh_init_cumulative_args (CUMULATIVE_ARGS *  pcum,
+			 tree		    fntype,
+			 rtx		    libname ATTRIBUTE_UNUSED,
+			 tree		    fndecl,
+			 signed int	    n_named_args,
+			 enum machine_mode  mode)
+{
+  pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
+  pcum->free_single_fp_reg = 0;
+  pcum->stack_regs = 0;
+  pcum->byref_regs = 0;
+  pcum->byref = 0;
+  pcum->outgoing = (n_named_args == -1) ? 0 : 1;
+
+  /* XXX - Should we check TARGET_HITACHI here ???  */
+  pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
+
+  if (fntype)
+    {
+      pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
+			 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
+      pcum->prototype_p = prototype_p (fntype);
+      pcum->arg_count [(int) SH_ARG_INT]
+	= TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
+
+      pcum->call_cookie
+	= CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
+				 && pcum->arg_count [(int) SH_ARG_INT] == 0
+				 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
+				     ? int_size_in_bytes (TREE_TYPE (fntype))
+				     : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
+				 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
+				     == FIRST_RET_REG));
+    }
+  else
+    {
+      pcum->arg_count [(int) SH_ARG_INT] = 0;
+      pcum->prototype_p = FALSE;
+      if (mode != VOIDmode)
+	{
+	  pcum->call_cookie =
+	    CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
+				   && GET_MODE_SIZE (mode) > 4
+				   && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
+
+	  /* If the default ABI is the Renesas ABI then all library
+	     calls must assume that the library will be using the
+	     Renesas ABI.  So if the function would return its result
+	     in memory then we must force the address of this memory
+	     block onto the stack.  Ideally we would like to call
+	     targetm.calls.return_in_memory() here but we do not have
+	     the TYPE or the FNDECL available so we synthesize the
+	     contents of that function as best we can.  */
+	  pcum->force_mem =
+	    (TARGET_DEFAULT & MASK_HITACHI)
+	    && (mode == BLKmode
+		|| (GET_MODE_SIZE (mode) > 4
+		    && !(mode == DFmode
+			 && TARGET_FPU_DOUBLE)));
+	}
+      else
+	{
+	  pcum->call_cookie = 0;
+	  pcum->force_mem = FALSE;
+	}
+    }
+}
+
+/* Replace any occurrence of FROM(n) in X with TO(n).  The function does
+   not enter into CONST_DOUBLE for the replace.
+
+   Note that copying is not done so X must not be shared unless all copies
+   are to be modified.
+
+   This is like replace_rtx, except that we operate on N_REPLACEMENTS
+   replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
+   replacements[n*2+1] - and that we take mode changes into account.
+
+   If a replacement is ambiguous, return NULL_RTX.
+
+   If MODIFY is zero, don't modify any rtl in place,
+   just return zero or nonzero for failure / success.  */
+rtx
+replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
+{
+  int i, j;
+  const char *fmt;
+
+  /* The following prevents loops occurrence when we change MEM in
+     CONST_DOUBLE onto the same CONST_DOUBLE.  */
+  if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
+    return x;
+
+  for (i = n_replacements - 1; i >= 0 ; i--)
+  if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
+    return replacements[i*2+1];
+
+  /* Allow this function to make replacements in EXPR_LISTs.  */
+  if (x == NULL_RTX)
+    return NULL_RTX;
+
+  if (GET_CODE (x) == SUBREG)
+    {
+      rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
+				    n_replacements, modify);
+
+      if (CONST_INT_P (new_rtx))
+	{
+	  x = simplify_subreg (GET_MODE (x), new_rtx,
+			       GET_MODE (SUBREG_REG (x)),
+			       SUBREG_BYTE (x));
+	  if (! x)
+	    abort ();
+	}
+      else if (modify)
+	SUBREG_REG (x) = new_rtx;
+
+      return x;
+    }
+  else if (REG_P (x))
+    {
+      unsigned regno = REGNO (x);
+      unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
+			? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
+      rtx result = NULL_RTX;
+
+      for (i = n_replacements - 1; i >= 0; i--)
+	{
+	  rtx from = replacements[i*2];
+	  rtx to = replacements[i*2+1];
+	  unsigned from_regno, from_nregs, to_regno, new_regno;
+
+	  if (!REG_P (from))
+	    continue;
+	  from_regno = REGNO (from);
+	  from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
+			? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
+	  if (regno < from_regno + from_nregs && regno + nregs > from_regno)
+	    {
+	      if (regno < from_regno
+		  || regno + nregs > from_regno + nregs
+		  || !REG_P (to)
+		  || result)
+		return NULL_RTX;
+	      to_regno = REGNO (to);
+	      if (to_regno < FIRST_PSEUDO_REGISTER)
+		{
+		  new_regno = regno + to_regno - from_regno;
+		  if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
+		      != nregs)
+		    return NULL_RTX;
+		  result = gen_rtx_REG (GET_MODE (x), new_regno);
+		}
+	      else if (GET_MODE (x) <= GET_MODE (to))
+		result = gen_lowpart_common (GET_MODE (x), to);
+	      else
+		result = gen_lowpart_SUBREG (GET_MODE (x), to);
+	    }
+	}
+      return result ? result : x;
+    }
+  else if (GET_CODE (x) == ZERO_EXTEND)
+    {
+      rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
+				    n_replacements, modify);
+
+      if (CONST_INT_P (new_rtx))
+	{
+	  x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
+					new_rtx, GET_MODE (XEXP (x, 0)));
+	  if (! x)
+	    abort ();
+	}
+      else if (modify)
+	XEXP (x, 0) = new_rtx;
+
+      return x;
+    }
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      rtx new_rtx;
+
+      if (fmt[i] == 'e')
+	{
+	  new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
+				    n_replacements, modify);
+	  if (!new_rtx)
+	    return NULL_RTX;
+	  if (modify)
+	    XEXP (x, i) = new_rtx;
+	}
+      else if (fmt[i] == 'E')
+	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	  {
+	    new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
+				      n_replacements, modify);
+	  if (!new_rtx)
+	    return NULL_RTX;
+	    if (modify)
+	      XVECEXP (x, i, j) = new_rtx;
+	  }
+    }
+
+  return x;
+}
+
+rtx
+sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
+{
+  enum rtx_code code = TRUNCATE;
+
+  if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
+    {
+      rtx inner = XEXP (x, 0);
+      enum machine_mode inner_mode = GET_MODE (inner);
+
+      if (inner_mode == mode)
+	return inner;
+      else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
+	x = inner;
+      else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
+	       && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
+	{
+	  code = GET_CODE (x);
+	  x = inner;
+	}
+    }
+  return gen_rtx_fmt_e (code, mode, x);
+}
+
+/* Called via for_each_rtx after reload, to clean up truncates of
+   registers that span multiple actual hard registers.  */
+int
+shmedia_cleanup_truncate (rtx *p, void *n_changes)
+{
+  rtx x = *p, reg;
+
+  if (GET_CODE (x) != TRUNCATE)
+    return 0;
+  reg = XEXP (x, 0);
+  if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
+    {
+      enum machine_mode reg_mode = GET_MODE (reg);
+      XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
+				     subreg_lowpart_offset (DImode, reg_mode));
+      *(int*) n_changes += 1;
+      return -1;
+    }
+  return 0;
+}
+
+/* Load and store depend on the highpart of the address.  However,
+   set_attr_alternative does not give well-defined results before reload,
+   so we must look at the rtl ourselves to see if any of the feeding
+   registers is used in a memref.
+
+   Called by sh_contains_memref_p via for_each_rtx.  */
+static int
+sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
+{
+  return (MEM_P (*loc));
+}
+
+/* Return true iff INSN contains a MEM.  */
+bool
+sh_contains_memref_p (rtx insn)
+{
+  return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
+}
+
+/* Return true iff INSN loads a banked register.  */
+bool
+sh_loads_bankedreg_p (rtx insn)
+{
+  if (GET_CODE (PATTERN (insn)) == SET)
+    {
+      rtx op = SET_DEST (PATTERN(insn));
+      if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
+	return true;
+    }
+
+  return false;
+}
+
+/* FNADDR is the MEM expression from a call expander.  Return an address
+   to use in an SHmedia insn pattern.  */
+rtx
+shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
+{
+  int is_sym;
+
+  fnaddr = XEXP (fnaddr, 0);
+  is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
+  if (flag_pic && is_sym)
+    {
+      if (! SYMBOL_REF_LOCAL_P (fnaddr))
+	{
+	  rtx reg = gen_reg_rtx (Pmode);
+
+	  /* We must not use GOTPLT for sibcalls, because PIC_REG
+	     must be restored before the PLT code gets to run.  */
+	  if (is_sibcall)
+	    emit_insn (gen_symGOT2reg (reg, fnaddr));
+	  else
+	    emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
+	  fnaddr = reg;
+	}
+      else
+	{
+	  fnaddr = gen_sym2PIC (fnaddr);
+	  PUT_MODE (fnaddr, Pmode);
+	}
+    }
+  /* If ptabs might trap, make this visible to the rest of the compiler.
+     We generally assume that symbols pertain to valid locations, but
+     it is possible to generate invalid symbols with asm or linker tricks.
+     In a list of functions where each returns its successor, an invalid
+     symbol might denote an empty list.  */
+  if (!TARGET_PT_FIXED
+      && (!is_sym || TARGET_INVALID_SYMBOLS)
+      && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
+    {
+      rtx tr = gen_reg_rtx (PDImode);
+
+      emit_insn (gen_ptabs (tr, fnaddr));
+      fnaddr = tr;
+    }
+  else if (! target_reg_operand (fnaddr, Pmode))
+    fnaddr = copy_to_mode_reg (Pmode, fnaddr);
+  return fnaddr;
+}
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.  */
+static reg_class_t
+sh_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (rclass == NO_REGS
+      && TARGET_SHMEDIA
+      && (CONST_DOUBLE_P (x)
+	  || GET_CODE (x) == SYMBOL_REF
+	  || PIC_ADDR_P (x)))
+    return GENERAL_REGS;
+
+  return rclass;
+}
+
+/* Implement TARGET_SECONDARY_RELOAD.  */
+static reg_class_t
+sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+		     enum machine_mode mode, secondary_reload_info *sri)
+{
+  enum reg_class rclass = (enum reg_class) rclass_i;
+
+  if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
+      && REG_P (XEXP (XEXP (x, 0), 0))
+      && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
+    return rclass == R0_REGS ? NO_REGS : R0_REGS;
+
+  if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
+    return rclass == R0_REGS ? NO_REGS : R0_REGS;
+
+  if (REG_P (x) && REGNO (x) == GBR_REG)
+    return NO_REGS;
+
+  if (in_p)
+    {
+      if (REGCLASS_HAS_FP_REG (rclass)
+	  && ! TARGET_SHMEDIA
+	  && immediate_operand ((x), mode)
+	  && ! ((fp_zero_operand (x) || fp_one_operand (x))
+		&& mode == SFmode && fldi_ok ()))
+	switch (mode)
+	  {
+	  case SFmode:
+	    sri->icode = CODE_FOR_reload_insf__frn;
+	    return NO_REGS;
+	  case DFmode:
+	    sri->icode = CODE_FOR_reload_indf__frn;
+	    return NO_REGS;
+	  case SImode:
+	    /* ??? If we knew that we are in the appropriate mode -
+	       single precision - we could use a reload pattern directly.  */
+	    return FPUL_REGS;
+	  default:
+	    abort ();
+	  }
+      if (rclass == FPUL_REGS
+	  && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
+			     || REGNO (x) == T_REG))
+	      || GET_CODE (x) == PLUS))
+	return GENERAL_REGS;
+      if (rclass == FPUL_REGS && immediate_operand (x, mode))
+	{
+	  if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
+	    return GENERAL_REGS;
+	  else if (mode == SFmode)
+	    return FP_REGS;
+	  sri->icode = CODE_FOR_reload_insi__i_fpul;
+	  return NO_REGS;
+	}
+      if (rclass == FPSCR_REGS
+	  && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
+	      || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
+        return GENERAL_REGS;
+      if (REGCLASS_HAS_FP_REG (rclass)
+	  && TARGET_SHMEDIA
+	  && immediate_operand (x, mode)
+	  && x != CONST0_RTX (GET_MODE (x))
+	  && GET_MODE (x) != V4SFmode)
+	return GENERAL_REGS;
+      if ((mode == QImode || mode == HImode)
+	  && TARGET_SHMEDIA && inqhi_operand (x, mode))
+	{
+	  sri->icode = ((mode == QImode)
+			? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
+	  return NO_REGS;
+	}
+      if (TARGET_SHMEDIA && rclass == GENERAL_REGS
+	  && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
+	return TARGET_REGS;
+    } /* end of input-only processing.  */
+
+  if (((REGCLASS_HAS_FP_REG (rclass)
+	&& (REG_P (x)
+	    && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
+		|| (FP_REGISTER_P (REGNO (x)) && mode == SImode
+		    && TARGET_FMOVD))))
+       || (REGCLASS_HAS_GENERAL_REG (rclass)
+	   && REG_P (x)
+	   && FP_REGISTER_P (REGNO (x))))
+      && ! TARGET_SHMEDIA
+      && (mode == SFmode || mode == SImode))
+    return FPUL_REGS;
+  if ((rclass == FPUL_REGS
+       || (REGCLASS_HAS_FP_REG (rclass)
+	   && ! TARGET_SHMEDIA && mode == SImode))
+      && (MEM_P (x)
+	  || (REG_P (x)
+	      && (REGNO (x) >= FIRST_PSEUDO_REGISTER
+		  || REGNO (x) == T_REG
+		  || system_reg_operand (x, VOIDmode)))))
+    {
+      if (rclass == FPUL_REGS)
+	return GENERAL_REGS;
+      return FPUL_REGS;
+    }
+  if ((rclass == TARGET_REGS
+       || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
+      && !satisfies_constraint_Csy (x)
+      && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
+    return GENERAL_REGS;
+  if ((rclass == MAC_REGS || rclass == PR_REGS)
+      && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
+      && rclass != REGNO_REG_CLASS (REGNO (x)))
+    return GENERAL_REGS;
+  if (rclass != GENERAL_REGS && REG_P (x)
+      && TARGET_REGISTER_P (REGNO (x)))
+    return GENERAL_REGS;
+
+ /* If here fall back to loading FPUL register through general registers.
+    This case can happen when movsi_ie insn is picked initially to
+    load/store the FPUL register from/to another register, and then the
+    other register is allocated on the stack.  */
+  if (rclass == FPUL_REGS && true_regnum (x) == -1)
+    return GENERAL_REGS;
+
+  /* Force mov.b / mov.w displacement addressing insn to use R0 as
+     the other operand.
+     On SH2A could also just leave it alone here, which would result in a
+     4 byte move insn being generated instead.  However, for this to work
+     the insns must have the appropriate alternatives.  */
+  if ((mode == QImode || mode == HImode) && rclass != R0_REGS
+      && satisfies_constraint_Sdd (x)
+      && sh_disp_addr_displacement (x)
+	 <= sh_max_mov_insn_displacement (mode, false))
+    return R0_REGS;
+
+  /* When reload is trying to address a QImode or HImode subreg on the stack, 
+     force any subreg byte into R0_REGS, as this is going to become a
+     displacement address.
+     We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
+     is on the stack, the memref to it might already require a displacement
+     and that has to be added to the final address.  At this point we don't
+     know the cumulative displacement so we assume the worst case.  */
+  if ((mode == QImode || mode == HImode) && rclass != R0_REGS 
+      && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
+    return R0_REGS;
+
+  return NO_REGS;
+}
+
+static void
+sh_conditional_register_usage (void)
+{
+  int regno;
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
+    if (! VALID_REGISTER_P (regno))
+      fixed_regs[regno] = call_used_regs[regno] = 1;
+  /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs.  */
+  if (TARGET_SH5)
+    {
+      call_used_regs[FIRST_GENERAL_REG + 8]
+	= call_used_regs[FIRST_GENERAL_REG + 9] = 1;
+      call_really_used_regs[FIRST_GENERAL_REG + 8]
+	= call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
+    }
+  if (TARGET_SHMEDIA)
+    {
+      regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
+      CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
+      regno_reg_class[FIRST_FP_REG] = FP_REGS;
+    }
+  if (flag_pic)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+    }
+  /* Renesas saves and restores mac registers on call.  */
+  if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
+    {
+      call_really_used_regs[MACH_REG] = 0;
+      call_really_used_regs[MACL_REG] = 0;
+    }
+
+  if (TARGET_SHMEDIA)
+    {
+      for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
+	if (! fixed_regs[regno] && call_really_used_regs[regno])
+	  SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
+    }
+  else
+    for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
+      if (! fixed_regs[regno] && call_really_used_regs[regno])
+	SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P
+
+   can_store_by_pieces constructs VOIDmode CONST_DOUBLEs.  */
+static bool
+sh_legitimate_constant_p (enum machine_mode mode, rtx x)
+{
+  return (TARGET_SHMEDIA
+	  ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
+	     || x == CONST0_RTX (mode)
+	     || !TARGET_SHMEDIA_FPU
+	     || TARGET_SHMEDIA64)
+	  : (GET_CODE (x) != CONST_DOUBLE
+	     || mode == DFmode || mode == SFmode
+	     || mode == DImode || GET_MODE (x) == VOIDmode));
+}
+
+enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
+
+static void
+sh_init_sync_libfuncs (void)
+{
+  init_sync_libfuncs (UNITS_PER_WORD);
+}
+
+/* Return true if it is appropriate to emit `ret' instructions in the
+   body of a function.  */
+bool
+sh_can_use_simple_return_p (void)
+{
+  HARD_REG_SET live_regs_mask;
+  int d;
+
+  /* Some targets require special return insns.  */
+  if (TARGET_SHMEDIA
+      || (TARGET_SHCOMPACT
+	  && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
+    return false;
+
+  if (! reload_completed || frame_pointer_needed)
+    return false;
+
+  /* Moving prologue around does't reduce the size.  */
+  if (optimize_function_for_size_p (cfun))
+    return false;
+
+  /* Finally, allow for pr save.  */
+  d = calc_live_regs (&live_regs_mask);
+
+  if (rounded_frame_size (d) > 4)
+   return false;
+
+  return true;
+}
+
+/*------------------------------------------------------------------------------
+  Address mode optimization support code
+*/
+
+typedef HOST_WIDE_INT disp_t;
+static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
+static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
+static const disp_t INVALID_DISP = MAX_DISP;
+
+/* A memory reference which is described by a base register and a
+   displacement.  */
+class base_reg_disp
+{
+public:
+  base_reg_disp (rtx br, disp_t d);
+
+  bool is_reg (void) const;
+  bool is_disp (void) const;
+  rtx reg (void) const;
+  disp_t disp (void) const;
+
+private:
+  rtx reg_;
+  disp_t disp_;
+};
+
+inline
+base_reg_disp::base_reg_disp (rtx br, disp_t d)
+: reg_ (br), disp_ (d)
+{
+}
+ 
+inline bool
+base_reg_disp::is_reg (void) const
+{
+  return reg_ != NULL_RTX && disp_ != INVALID_DISP;
+}
+
+inline bool
+base_reg_disp::is_disp (void) const
+{
+  return reg_ == NULL_RTX && disp_ != INVALID_DISP;
+}
+
+inline rtx
+base_reg_disp::reg (void) const
+{
+  return reg_;
+}
+
+inline disp_t
+base_reg_disp::disp (void) const
+{
+  return disp_;
+}
+
+/* Find the base register and calculate the displacement for a given
+   address rtx 'x'.
+   This is done by walking the insn list backwards and following SET insns
+   that set the value of the specified reg 'x'.  */
+static base_reg_disp
+sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL)
+{
+  if (REG_P (x))
+    {
+      if (REGNO (x) == GBR_REG)
+	return base_reg_disp (x, disp);
+
+      /* We've reached a hard-reg.  This is probably the point where
+	 function args are copied to pseudos.  Do not go any further and
+	 stick to the pseudo.  If the original mem addr was in a hard reg
+	 from the beginning, it will become the base reg.  */
+      if (REGNO (x) < FIRST_PSEUDO_REGISTER)
+	return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
+
+      /* Try to find the previous insn that sets the reg.  */
+      for (rtx i = prev_nonnote_insn (insn); i != NULL;
+	   i = prev_nonnote_insn (i))
+	{
+	  if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
+	      && CALL_P (i))
+	    break;
+
+	  if (!NONJUMP_INSN_P (i))
+	    continue;
+
+	  rtx p = PATTERN (i);
+	  if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0))
+	      && REGNO (XEXP (p, 0)) == REGNO (x))
+	    {
+	      /* If the recursion can't find out any more details about the
+		 source of the set, then this reg becomes our new base reg.  */
+	      return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0));
+	    }
+	}
+
+    /* When here, no previous insn was found that sets the reg.
+       The input reg is already the base reg.  */
+    return base_reg_disp (x, disp);
+  }
+
+  else if (GET_CODE (x) == PLUS)
+    {
+      base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
+      base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
+
+      /* Either left or right val must be a reg.
+	 We don't handle the case of 'reg + reg' here.  */
+      if (left_val.is_reg () && right_val.is_disp ())
+	return base_reg_disp (left_val.reg (), left_val.disp ()
+					       + right_val.disp () + disp);
+      else if (right_val.is_reg () && left_val.is_disp ())
+	return base_reg_disp (right_val.reg (), right_val.disp ()
+						+ left_val.disp () + disp);
+      else
+	return base_reg_disp (base_reg, disp);
+    }
+
+  else if (CONST_INT_P (x))
+    return base_reg_disp (NULL, disp + INTVAL (x));
+
+  /* Didn't find anything useful.  */
+  return base_reg_disp (base_reg, disp);
+}
+
+/* Given an insn and a memory operand, try to find an equivalent GBR
+   based memory address and return the corresponding new memory address.
+   Return NULL_RTX if not found.  */
+rtx
+sh_find_equiv_gbr_addr (rtx insn, rtx mem)
+{
+  if (!MEM_P (mem))
+    return NULL_RTX;
+
+  /* Leave post/pre inc/dec or any other side effect addresses alone.  */
+  if (side_effects_p (XEXP (mem, 0)))
+    return NULL_RTX;
+
+  base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
+
+  if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
+    {
+      rtx disp = GEN_INT (gbr_disp.disp ());
+      if (gbr_displacement (disp, GET_MODE (mem)))
+	return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
+    }
+
+  return NULL_RTX;
+}
+
+/*------------------------------------------------------------------------------
+  Manual insn combine support code.
+*/
+
+/* Given a reg rtx and a start insn, try to find the insn that sets the
+   specified reg by using the specified insn stepping function, such as 
+   'prev_nonnote_insn_bb'.  When the insn is found, try to extract the rtx
+   of the reg set.  */
+set_of_reg
+sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx))
+{
+  set_of_reg result;
+  result.insn = insn;
+  result.set_rtx = NULL_RTX;
+  result.set_src = NULL_RTX;
+
+  if (!REG_P (reg) || insn == NULL_RTX)
+    return result;
+
+  for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
+       result.insn = stepfunc (result.insn))
+    {
+      if (BARRIER_P (result.insn))
+	return result;
+      if (!NONJUMP_INSN_P (result.insn))
+	continue;
+      if (reg_set_p (reg, result.insn))
+	{
+	  result.set_rtx = set_of (reg, result.insn);
+
+	  if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
+	    return result;
+
+	  result.set_src = XEXP (result.set_rtx, 1);
+	  return result;
+	}
+    }
+
+  return result;
+}
+
+/* Given an op rtx and an insn, try to find out whether the result of the
+   specified op consists only of logical operations on T bit stores.  */
+bool
+sh_is_logical_t_store_expr (rtx op, rtx insn)
+{
+  if (!logical_operator (op, SImode))
+    return false;
+
+  rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
+  int op_is_t_count = 0;
+
+  for (int i = 0; i < 2; ++i)
+    {
+      if (t_reg_operand (ops[i], VOIDmode)
+	  || negt_reg_operand (ops[i], VOIDmode))
+	op_is_t_count++;
+
+      else
+	{
+	  set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
+						  prev_nonnote_insn_bb);
+	  if (op_set.set_src == NULL_RTX)
+	    continue;
+
+	  if (t_reg_operand (op_set.set_src, VOIDmode)
+	      || negt_reg_operand (op_set.set_src, VOIDmode)
+	      || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
+	      op_is_t_count++;
+	}
+    }
+  
+  return op_is_t_count == 2;
+}
+
+/* Given the operand that is extended in a sign/zero extend insn, and the
+   insn, try to figure out whether the sign/zero extension can be replaced
+   by a simple reg-reg copy.  If so, the replacement reg rtx is returned,
+   NULL_RTX otherwise.  */
+rtx
+sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
+{
+  if (REG_P (extended_op))
+    extended_op = extended_op;
+  else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
+    extended_op = SUBREG_REG (extended_op);
+  else
+    return NULL_RTX;
+
+  /* Reg moves must be of the same mode.  */
+  if (GET_MODE (extended_op) != SImode)
+    return NULL_RTX;
+
+  set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
+  if (s.set_src == NULL_RTX)
+    return NULL_RTX;
+
+  if (t_reg_operand (s.set_src, VOIDmode)
+      || negt_reg_operand (s.set_src, VOIDmode))
+    return extended_op;
+
+  /* If the zero extended reg was formed by a logical operation, check the
+     operands of the logical operation.  If both originated from T bit
+     stores the zero extension can be eliminated.  */
+  else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
+    return extended_op;
+
+  return NULL_RTX;
+}
+
+#include "gt-sh.h"
diff --git a/gcc-4.9/gcc/config/sh/sh.h b/gcc-4.9/gcc/config/sh/sh.h
new file mode 100644
index 000000000..881930011
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh.h
@@ -0,0 +1,2311 @@
+/* Definitions of target machine for GNU compiler for Renesas / SuperH SH.
+   Copyright (C) 1993-2014 Free Software Foundation, Inc.
+   Contributed by Steve Chamberlain (sac@cygnus.com).
+   Improved by Jim Wilson (wilson@cygnus.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SH_H
+#define GCC_SH_H
+
+#include "config/vxworks-dummy.h"
+
+/* Unfortunately, insn-attrtab.c doesn't include insn-codes.h.  We can't
+   include it here, because bconfig.h is also included by gencodes.c .  */
+/* ??? No longer true.  */
+extern int code_for_indirect_jump_scratch;
+
+#define TARGET_CPU_CPP_BUILTINS() sh_cpu_cpp_builtins (pfile)
+
+/* Value should be nonzero if functions must have frame pointers.
+   Zero means the frame pointer need not be set up (and parms may be accessed
+   via the stack pointer) in functions that seem suitable.  */
+
+#ifndef SUBTARGET_FRAME_POINTER_REQUIRED
+#define SUBTARGET_FRAME_POINTER_REQUIRED 0
+#endif
+
+
+/* Nonzero if this is an ELF target - compile time only */
+#define TARGET_ELF 0
+
+/* Nonzero if we should generate code using type 2E insns.  */
+#define TARGET_SH2E (TARGET_SH2 && TARGET_SH_E)
+
+/* Nonzero if we should generate code using type 2A insns.  */
+#define TARGET_SH2A TARGET_HARD_SH2A
+/* Nonzero if we should generate code using type 2A SF insns.  */
+#define TARGET_SH2A_SINGLE (TARGET_SH2A && TARGET_SH2E)
+/* Nonzero if we should generate code using type 2A DF insns.  */
+#define TARGET_SH2A_DOUBLE (TARGET_HARD_SH2A_DOUBLE && TARGET_SH2A)
+
+/* Nonzero if we should generate code using type 3E insns.  */
+#define TARGET_SH3E (TARGET_SH3 && TARGET_SH_E)
+
+/* Nonzero if we schedule for a superscalar implementation.  */
+#define TARGET_SUPERSCALAR (TARGET_HARD_SH4 || TARGET_SH2A)
+
+/* Nonzero if a double-precision FPU is available.  */
+#define TARGET_FPU_DOUBLE \
+  ((target_flags & MASK_SH4) != 0 || TARGET_SH2A_DOUBLE)
+
+/* Nonzero if an FPU is available.  */
+#define TARGET_FPU_ANY (TARGET_SH2E || TARGET_FPU_DOUBLE)
+
+/* Nonzero if we should generate code using type 4 insns.  */
+#undef TARGET_SH4
+#define TARGET_SH4 ((target_flags & MASK_SH4) != 0 && TARGET_SH1)
+
+/* Nonzero if we're generating code for the common subset of
+   instructions present on both SH4a and SH4al-dsp.  */
+#define TARGET_SH4A_ARCH TARGET_SH4A
+
+/* Nonzero if we're generating code for SH4a, unless the use of the
+   FPU is disabled (which makes it compatible with SH4al-dsp).  */
+#define TARGET_SH4A_FP (TARGET_SH4A_ARCH && TARGET_FPU_ANY)
+
+/* Nonzero if we should generate code using the SHcompact instruction
+   set and 32-bit ABI.  */
+#define TARGET_SHCOMPACT (TARGET_SH5 && TARGET_SH1)
+
+/* Nonzero if we should generate code using the SHmedia instruction
+   set and ABI.  */
+#define TARGET_SHMEDIA (TARGET_SH5 && ! TARGET_SH1)
+
+/* Nonzero if we should generate code using the SHmedia ISA and 32-bit
+   ABI.  */
+#define TARGET_SHMEDIA32 (TARGET_SH5 && ! TARGET_SH1 && TARGET_SH_E)
+
+/* Nonzero if we should generate code using the SHmedia ISA and 64-bit
+   ABI.  */
+#define TARGET_SHMEDIA64 (TARGET_SH5 && ! TARGET_SH1 && ! TARGET_SH_E)
+
+/* Nonzero if we should generate code using SHmedia FPU instructions.  */
+#define TARGET_SHMEDIA_FPU (TARGET_SHMEDIA && TARGET_FPU_DOUBLE)
+
+/* This is not used by the SH2E calling convention  */
+#define TARGET_VARARGS_PRETEND_ARGS(FUN_DECL) \
+  (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5 \
+   && ! (TARGET_HITACHI || sh_attr_renesas_p (FUN_DECL)))
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT SELECT_SH1
+#define SUPPORT_SH1 1
+#define SUPPORT_SH2E 1
+#define SUPPORT_SH4 1
+#define SUPPORT_SH4_SINGLE 1
+#define SUPPORT_SH2A 1
+#define SUPPORT_SH2A_SINGLE 1
+#endif
+
+#define TARGET_DIVIDE_INV \
+  (sh_div_strategy == SH_DIV_INV || sh_div_strategy == SH_DIV_INV_MINLAT \
+   || sh_div_strategy == SH_DIV_INV20U || sh_div_strategy == SH_DIV_INV20L \
+   || sh_div_strategy == SH_DIV_INV_CALL \
+   || sh_div_strategy == SH_DIV_INV_CALL2 || sh_div_strategy == SH_DIV_INV_FP)
+#define TARGET_DIVIDE_FP (sh_div_strategy == SH_DIV_FP)
+#define TARGET_DIVIDE_INV_FP (sh_div_strategy == SH_DIV_INV_FP)
+#define TARGET_DIVIDE_CALL2 (sh_div_strategy == SH_DIV_CALL2)
+#define TARGET_DIVIDE_INV_MINLAT (sh_div_strategy == SH_DIV_INV_MINLAT)
+#define TARGET_DIVIDE_INV20U (sh_div_strategy == SH_DIV_INV20U)
+#define TARGET_DIVIDE_INV20L (sh_div_strategy == SH_DIV_INV20L)
+#define TARGET_DIVIDE_INV_CALL (sh_div_strategy == SH_DIV_INV_CALL)
+#define TARGET_DIVIDE_INV_CALL2 (sh_div_strategy == SH_DIV_INV_CALL2)
+#define TARGET_DIVIDE_CALL_DIV1 (sh_div_strategy == SH_DIV_CALL_DIV1)
+#define TARGET_DIVIDE_CALL_FP (sh_div_strategy == SH_DIV_CALL_FP)
+#define TARGET_DIVIDE_CALL_TABLE (sh_div_strategy == SH_DIV_CALL_TABLE)
+
+#define SELECT_SH1		 (MASK_SH1)
+#define SELECT_SH2		 (MASK_SH2 | SELECT_SH1)
+#define SELECT_SH2E		 (MASK_SH_E | MASK_SH2 | MASK_SH1 \
+				  | MASK_FPU_SINGLE)
+#define SELECT_SH2A		 (MASK_SH_E | MASK_HARD_SH2A \
+				  | MASK_HARD_SH2A_DOUBLE \
+				  | MASK_SH2 | MASK_SH1)
+#define SELECT_SH2A_NOFPU	 (MASK_HARD_SH2A | MASK_SH2 | MASK_SH1)
+#define SELECT_SH2A_SINGLE_ONLY  (MASK_SH_E | MASK_HARD_SH2A | MASK_SH2 \
+				  | MASK_SH1 | MASK_FPU_SINGLE \
+				  | MASK_FPU_SINGLE_ONLY)
+#define SELECT_SH2A_SINGLE	 (MASK_SH_E | MASK_HARD_SH2A \
+				  | MASK_FPU_SINGLE | MASK_HARD_SH2A_DOUBLE \
+				  | MASK_SH2 | MASK_SH1)
+#define SELECT_SH3		 (MASK_SH3 | SELECT_SH2)
+#define SELECT_SH3E		 (MASK_SH_E | MASK_FPU_SINGLE | SELECT_SH3)
+#define SELECT_SH4_NOFPU	 (MASK_HARD_SH4 | SELECT_SH3)
+#define SELECT_SH4_SINGLE_ONLY	 (MASK_HARD_SH4 | SELECT_SH3E \
+				  | MASK_FPU_SINGLE_ONLY)
+#define SELECT_SH4		 (MASK_SH4 | MASK_SH_E | MASK_HARD_SH4 \
+				  | SELECT_SH3)
+#define SELECT_SH4_SINGLE	 (MASK_FPU_SINGLE | SELECT_SH4)
+#define SELECT_SH4A_NOFPU	 (MASK_SH4A | SELECT_SH4_NOFPU)
+#define SELECT_SH4A_SINGLE_ONLY  (MASK_SH4A | SELECT_SH4_SINGLE_ONLY)
+#define SELECT_SH4A		 (MASK_SH4A | SELECT_SH4)
+#define SELECT_SH4A_SINGLE	 (MASK_SH4A | SELECT_SH4_SINGLE)
+#define SELECT_SH5_64MEDIA	 (MASK_SH5 | MASK_SH4)
+#define SELECT_SH5_64MEDIA_NOFPU (MASK_SH5)
+#define SELECT_SH5_32MEDIA	 (MASK_SH5 | MASK_SH4 | MASK_SH_E)
+#define SELECT_SH5_32MEDIA_NOFPU (MASK_SH5 | MASK_SH_E)
+#define SELECT_SH5_COMPACT	 (MASK_SH5 | MASK_SH4 | SELECT_SH3E)
+#define SELECT_SH5_COMPACT_NOFPU (MASK_SH5 | SELECT_SH3)
+
+#if SUPPORT_SH1
+#define SUPPORT_SH2 1
+#endif
+#if SUPPORT_SH2
+#define SUPPORT_SH3 1
+#define SUPPORT_SH2A_NOFPU 1
+#endif
+#if SUPPORT_SH3
+#define SUPPORT_SH4_NOFPU 1
+#endif
+#if SUPPORT_SH4_NOFPU
+#define SUPPORT_SH4A_NOFPU 1
+#define SUPPORT_SH4AL 1
+#endif
+
+#if SUPPORT_SH2E
+#define SUPPORT_SH3E 1
+#define SUPPORT_SH2A_SINGLE_ONLY 1
+#endif
+#if SUPPORT_SH3E
+#define SUPPORT_SH4_SINGLE_ONLY 1
+#endif
+#if SUPPORT_SH4_SINGLE_ONLY
+#define SUPPORT_SH4A_SINGLE_ONLY 1
+#endif
+
+#if SUPPORT_SH4
+#define SUPPORT_SH4A 1
+#endif
+
+#if SUPPORT_SH4_SINGLE
+#define SUPPORT_SH4A_SINGLE 1
+#endif
+
+#if SUPPORT_SH5_COMPAT
+#define SUPPORT_SH5_32MEDIA 1
+#endif
+
+#if SUPPORT_SH5_COMPACT_NOFPU
+#define SUPPORT_SH5_32MEDIA_NOFPU 1
+#endif
+
+#define SUPPORT_ANY_SH5_32MEDIA \
+  (SUPPORT_SH5_32MEDIA || SUPPORT_SH5_32MEDIA_NOFPU)
+#define SUPPORT_ANY_SH5_64MEDIA \
+  (SUPPORT_SH5_64MEDIA || SUPPORT_SH5_64MEDIA_NOFPU)
+#define SUPPORT_ANY_SH5 \
+  (SUPPORT_ANY_SH5_32MEDIA || SUPPORT_ANY_SH5_64MEDIA)
+
+/* Reset all target-selection flags.  */
+#define MASK_ARCH (MASK_SH1 | MASK_SH2 | MASK_SH3 | MASK_SH_E | MASK_SH4 \
+		   | MASK_HARD_SH2A | MASK_HARD_SH2A_DOUBLE | MASK_SH4A \
+		   | MASK_HARD_SH4 | MASK_FPU_SINGLE | MASK_SH5 \
+		   | MASK_FPU_SINGLE_ONLY)
+
+/* This defaults us to big-endian.  */
+#ifndef TARGET_ENDIAN_DEFAULT
+#define TARGET_ENDIAN_DEFAULT 0
+#endif
+
+#ifndef TARGET_OPT_DEFAULT
+#define TARGET_OPT_DEFAULT  0
+#endif
+
+#define TARGET_DEFAULT \
+  (TARGET_CPU_DEFAULT | TARGET_ENDIAN_DEFAULT | TARGET_OPT_DEFAULT)
+
+#ifndef SH_MULTILIB_CPU_DEFAULT
+#define SH_MULTILIB_CPU_DEFAULT "m1"
+#endif
+
+#if TARGET_ENDIAN_DEFAULT
+#define MULTILIB_DEFAULTS { "ml", SH_MULTILIB_CPU_DEFAULT }
+#else
+#define MULTILIB_DEFAULTS { "mb", SH_MULTILIB_CPU_DEFAULT }
+#endif
+
+#define CPP_SPEC " %(subtarget_cpp_spec) "
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC ""
+#endif
+
+#ifndef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS
+#endif
+
+#define EXTRA_SPECS						\
+  { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC },			\
+  { "link_emul_prefix", LINK_EMUL_PREFIX },			\
+  { "link_default_cpu_emul", LINK_DEFAULT_CPU_EMUL },		\
+  { "subtarget_link_emul_suffix", SUBTARGET_LINK_EMUL_SUFFIX },	\
+  { "subtarget_link_spec", SUBTARGET_LINK_SPEC },		\
+  { "subtarget_asm_endian_spec", SUBTARGET_ASM_ENDIAN_SPEC },	\
+  { "subtarget_asm_relax_spec", SUBTARGET_ASM_RELAX_SPEC },	\
+  { "subtarget_asm_isa_spec", SUBTARGET_ASM_ISA_SPEC },		\
+  { "subtarget_asm_spec", SUBTARGET_ASM_SPEC },			\
+  SUBTARGET_EXTRA_SPECS
+
+#if TARGET_CPU_DEFAULT & MASK_HARD_SH4
+#define SUBTARGET_ASM_RELAX_SPEC "%{!m1:%{!m2:%{!m3*:%{!m5*:-isa=sh4-up}}}}"
+#else
+#define SUBTARGET_ASM_RELAX_SPEC "%{m4*:-isa=sh4-up}"
+#endif
+
+#define SH_ASM_SPEC \
+ "%(subtarget_asm_endian_spec) %{mrelax:-relax %(subtarget_asm_relax_spec)} \
+%(subtarget_asm_isa_spec) %(subtarget_asm_spec) \
+%{m2a:--isa=sh2a} \
+%{m2a-single:--isa=sh2a} \
+%{m2a-single-only:--isa=sh2a} \
+%{m2a-nofpu:--isa=sh2a-nofpu} \
+%{m5-compact*:--isa=SHcompact} \
+%{m5-32media*:--isa=SHmedia --abi=32} \
+%{m5-64media*:--isa=SHmedia --abi=64} \
+%{m4al:-dsp} %{mcut2-workaround:-cut2-workaround}"
+
+#define ASM_SPEC SH_ASM_SPEC
+
+#ifndef SUBTARGET_ASM_ENDIAN_SPEC
+#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
+#define SUBTARGET_ASM_ENDIAN_SPEC "%{mb:-big} %{!mb:-little}"
+#else
+#define SUBTARGET_ASM_ENDIAN_SPEC "%{ml:-little} %{!ml:-big}"
+#endif
+#endif
+
+#if STRICT_NOFPU == 1
+/* Strict nofpu means that the compiler should tell the assembler
+   to reject FPU instructions. E.g. from ASM inserts.  */
+#if TARGET_CPU_DEFAULT & MASK_HARD_SH4 && !(TARGET_CPU_DEFAULT & MASK_SH_E)
+#define SUBTARGET_ASM_ISA_SPEC "%{!m1:%{!m2:%{!m3*:%{m4-nofpu|!m4*:%{!m5:-isa=sh4-nofpu}}}}}"
+#else
+/* If there were an -isa option for sh5-nofpu then it would also go here. */
+#define SUBTARGET_ASM_ISA_SPEC \
+ "%{m4-nofpu:-isa=sh4-nofpu} " ASM_ISA_DEFAULT_SPEC
+#endif
+#else /* ! STRICT_NOFPU */
+#define SUBTARGET_ASM_ISA_SPEC ASM_ISA_DEFAULT_SPEC
+#endif
+
+#ifndef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC ""
+#endif
+
+#if TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
+#define LINK_EMUL_PREFIX "sh%{!mb:l}"
+#else
+#define LINK_EMUL_PREFIX "sh%{ml:l}"
+#endif
+
+#if TARGET_CPU_DEFAULT & MASK_SH5
+#if TARGET_CPU_DEFAULT & MASK_SH_E
+#define LINK_DEFAULT_CPU_EMUL "32"
+#if TARGET_CPU_DEFAULT & MASK_SH1
+#define ASM_ISA_SPEC_DEFAULT "--isa=SHcompact"
+#else
+#define ASM_ISA_SPEC_DEFAULT "--isa=SHmedia --abi=32"
+#endif /* MASK_SH1 */
+#else /* !MASK_SH_E */
+#define LINK_DEFAULT_CPU_EMUL "64"
+#define ASM_ISA_SPEC_DEFAULT "--isa=SHmedia --abi=64"
+#endif /* MASK_SH_E */
+#define ASM_ISA_DEFAULT_SPEC \
+" %{!m1:%{!m2*:%{!m3*:%{!m4*:%{!m5*:" ASM_ISA_SPEC_DEFAULT "}}}}}"
+#else /* !MASK_SH5 */
+#define LINK_DEFAULT_CPU_EMUL ""
+#define ASM_ISA_DEFAULT_SPEC ""
+#endif /* MASK_SH5 */
+
+#define SUBTARGET_LINK_EMUL_SUFFIX ""
+#define SUBTARGET_LINK_SPEC ""
+
+/* Go via SH_LINK_SPEC to avoid code replication.  */
+#define LINK_SPEC SH_LINK_SPEC
+
+#define SH_LINK_SPEC "\
+-m %(link_emul_prefix)\
+%{m5-compact*|m5-32media*:32}\
+%{m5-64media*:64}\
+%{!m1:%{!m2:%{!m3*:%{!m4*:%{!m5*:%(link_default_cpu_emul)}}}}}\
+%(subtarget_link_emul_suffix) \
+%{mrelax:-relax} %(subtarget_link_spec)"
+
+#ifndef SH_DIV_STR_FOR_SIZE
+#define SH_DIV_STR_FOR_SIZE "call"
+#endif
+
+/* SH2A does not support little-endian.  Catch such combinations
+   taking into account the default configuration.  */
+#if TARGET_ENDIAN_DEFAULT == MASK_BIG_ENDIAN
+#define IS_LITTLE_ENDIAN_OPTION "%{ml:"
+#else
+#define IS_LITTLE_ENDIAN_OPTION "%{!mb:"
+#endif
+ 
+#if TARGET_CPU_DEFAULT & MASK_HARD_SH2A
+#define UNSUPPORTED_SH2A IS_LITTLE_ENDIAN_OPTION \
+"%{m2a*|!m1:%{!m2*:%{!m3*:%{!m4*:{!m5*:%eSH2a does not support little-endian}}}}}}"
+#else
+#define UNSUPPORTED_SH2A IS_LITTLE_ENDIAN_OPTION \
+"%{m2a*:%eSH2a does not support little-endian}}"
+#endif
+
+#undef DRIVER_SELF_SPECS
+#define DRIVER_SELF_SPECS UNSUPPORTED_SH2A
+
+#define ASSEMBLER_DIALECT assembler_dialect
+
+extern int assembler_dialect;
+
+enum sh_divide_strategy_e {
+  /* SH5 strategies.  */
+  SH_DIV_CALL,
+  SH_DIV_CALL2,
+  SH_DIV_FP, /* We could do this also for SH4.  */
+  SH_DIV_INV,
+  SH_DIV_INV_MINLAT,
+  SH_DIV_INV20U,
+  SH_DIV_INV20L,
+  SH_DIV_INV_CALL,
+  SH_DIV_INV_CALL2,
+  SH_DIV_INV_FP,
+  /* SH1 .. SH4 strategies.  Because of the small number of registers
+     available, the compiler uses knowledge of the actual set of registers
+     being clobbered by the different functions called.  */
+  SH_DIV_CALL_DIV1, /* No FPU, medium size, highest latency.  */
+  SH_DIV_CALL_FP,     /* FPU needed, small size, high latency.  */
+  SH_DIV_CALL_TABLE,  /* No FPU, large size, medium latency. */
+  SH_DIV_INTRINSIC
+};
+
+extern enum sh_divide_strategy_e sh_div_strategy;
+
+#ifndef SH_DIV_STRATEGY_DEFAULT
+#define SH_DIV_STRATEGY_DEFAULT SH_DIV_CALL
+#endif
+
+#define SUBTARGET_OVERRIDE_OPTIONS (void) 0
+
+
+/* Target machine storage layout.  */
+
+#define TARGET_BIG_ENDIAN (!TARGET_LITTLE_ENDIAN)
+
+#define SH_REG_MSW_OFFSET (TARGET_LITTLE_ENDIAN ? 1 : 0)
+#define SH_REG_LSW_OFFSET (TARGET_LITTLE_ENDIAN ? 0 : 1)
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.  */
+#define BITS_BIG_ENDIAN  0
+
+/* Define this if most significant byte of a word is the lowest numbered.  */
+#define BYTES_BIG_ENDIAN TARGET_BIG_ENDIAN
+
+/* Define this if most significant word of a multiword number is the lowest
+   numbered.  */
+#define WORDS_BIG_ENDIAN TARGET_BIG_ENDIAN
+
+#define MAX_BITS_PER_WORD 64
+
+/* Width in bits of an `int'.  We want just 32-bits, even if words are
+   longer.  */
+#define INT_TYPE_SIZE 32
+
+/* Width in bits of a `long'.  */
+#define LONG_TYPE_SIZE (TARGET_SHMEDIA64 ? 64 : 32)
+
+/* Width in bits of a `long long'.  */
+#define LONG_LONG_TYPE_SIZE 64
+
+/* Width in bits of a `long double'.  */
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD	(TARGET_SHMEDIA ? 8 : 4)
+#define MIN_UNITS_PER_WORD 4
+
+/* Scaling factor for Dwarf data offsets for CFI information.
+   The dwarf2out.c default would use -UNITS_PER_WORD, which is -8 for
+   SHmedia; however, since we do partial register saves for the registers
+   visible to SHcompact, and for target registers for SHMEDIA32, we have
+   to allow saves that are only 4-byte aligned.  */
+#define DWARF_CIE_DATA_ALIGNMENT -4
+
+/* Width in bits of a pointer.
+   See also the macro `Pmode' defined below.  */
+#define POINTER_SIZE  (TARGET_SHMEDIA64 ? 64 : 32)
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY  	(TARGET_SH5 ? 64 : 32)
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY  BIGGEST_ALIGNMENT
+
+/* The log (base 2) of the cache line size, in bytes.  Processors prior to
+   SH2 have no actual cache, but they fetch code in chunks of 4 bytes.
+   The SH2/3 have 16 byte cache lines, and the SH4 has a 32 byte cache line */
+#define CACHE_LOG ((TARGET_HARD_SH4 || TARGET_SH5) ? 5 : TARGET_SH2 ? 4 : 2)
+
+/* ABI given & required minimum allocation boundary (in *bits*) for the
+   code of a function.  */
+#define FUNCTION_BOUNDARY (16 << TARGET_SHMEDIA)
+
+/* On SH5, the lowest bit is used to indicate SHmedia functions, so
+   the vbit must go into the delta field of
+   pointers-to-member-functions.  */
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION \
+  (TARGET_SH5 ? ptrmemfunc_vbit_in_delta : ptrmemfunc_vbit_in_pfn)
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY  32
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT  (TARGET_ALIGN_DOUBLE ? 64 : 32)
+
+/* The best alignment to use in cases where we have a choice.  */
+#define FASTEST_ALIGNMENT (TARGET_SH5 ? 64 : 32)
+
+/* Make strings word-aligned so strcpy from constants will be faster.  */
+#define CONSTANT_ALIGNMENT(EXP, ALIGN)	\
+  ((TREE_CODE (EXP) == STRING_CST	\
+    && (ALIGN) < FASTEST_ALIGNMENT)	\
+    ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* get_mode_alignment assumes complex values are always held in multiple
+   registers, but that is not the case on the SH; CQImode and CHImode are
+   held in a single integer register.  SH5 also holds CSImode and SCmode
+   values in integer registers.  This is relevant for argument passing on
+   SHcompact as we use a stack temp in order to pass CSImode by reference.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+  ((GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_INT \
+    || GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_FLOAT) \
+   ? (unsigned) MIN (BIGGEST_ALIGNMENT, GET_MODE_BITSIZE (TYPE_MODE (TYPE))) \
+   : (unsigned) DATA_ALIGNMENT(TYPE, ALIGN))
+
+/* Make arrays of chars word-aligned for the same reasons.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN)		\
+  (TREE_CODE (TYPE) == ARRAY_TYPE		\
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
+/* Number of bits which any structure or union's size must be a
+   multiple of.  Each structure or union's size is rounded up to a
+   multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY (TARGET_PADSTRUCT ? 32 : 8)
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.  */
+#define STRICT_ALIGNMENT 1
+
+/* If LABEL_AFTER_BARRIER demands an alignment, return its base 2 logarithm.  */
+#define LABEL_ALIGN_AFTER_BARRIER(LABEL_AFTER_BARRIER) \
+  barrier_align (LABEL_AFTER_BARRIER)
+
+#define LOOP_ALIGN(A_LABEL) sh_loop_align (A_LABEL)
+
+#define LABEL_ALIGN(A_LABEL) \
+(									\
+  (PREV_INSN (A_LABEL)							\
+   && NONJUMP_INSN_P (PREV_INSN (A_LABEL))				\
+   && GET_CODE (PATTERN (PREV_INSN (A_LABEL))) == UNSPEC_VOLATILE	\
+   && XINT (PATTERN (PREV_INSN (A_LABEL)), 1) == UNSPECV_ALIGN)		\
+   /* explicit alignment insn in constant tables.  */			\
+  ? INTVAL (XVECEXP (PATTERN (PREV_INSN (A_LABEL)), 0, 0))		\
+  : 0)
+
+/* Jump tables must be 32 bit aligned, no matter the size of the element.  */
+#define ADDR_VEC_ALIGN(ADDR_VEC) 2
+
+/* The base two logarithm of the known minimum alignment of an insn length.  */
+#define INSN_LENGTH_ALIGNMENT(A_INSN)					\
+  (NONJUMP_INSN_P (A_INSN)						\
+   ? 1 << TARGET_SHMEDIA						\
+   : JUMP_P (A_INSN) || CALL_P (A_INSN)					\
+   ? 1 << TARGET_SHMEDIA						\
+   : CACHE_LOG)
+
+/* Standard register usage.  */
+
+/* Register allocation for the Renesas calling convention:
+
+	r0		arg return
+	r1..r3		scratch
+	r4..r7		args in
+	r8..r13		call saved
+	r14		frame pointer/call saved
+	r15		stack pointer
+	ap		arg pointer (doesn't really exist, always eliminated)
+	pr		subroutine return address
+	t		t bit
+	mach		multiply/accumulate result, high part
+	macl		multiply/accumulate result, low part.
+	fpul		fp/int communication register
+	rap		return address pointer register
+	fr0		fp arg return
+	fr1..fr3	scratch floating point registers
+	fr4..fr11	fp args in
+	fr12..fr15	call saved floating point registers  */
+
+#define MAX_REGISTER_NAME_LENGTH 5
+extern char sh_register_names[][MAX_REGISTER_NAME_LENGTH + 1];
+
+#define SH_REGISTER_NAMES_INITIALIZER					\
+{									\
+  "r0",   "r1",   "r2",   "r3",   "r4",   "r5",   "r6",   "r7", 	\
+  "r8",   "r9",   "r10",  "r11",  "r12",  "r13",  "r14",  "r15",	\
+  "r16",  "r17",  "r18",  "r19",  "r20",  "r21",  "r22",  "r23",	\
+  "r24",  "r25",  "r26",  "r27",  "r28",  "r29",  "r30",  "r31",	\
+  "r32",  "r33",  "r34",  "r35",  "r36",  "r37",  "r38",  "r39", 	\
+  "r40",  "r41",  "r42",  "r43",  "r44",  "r45",  "r46",  "r47",	\
+  "r48",  "r49",  "r50",  "r51",  "r52",  "r53",  "r54",  "r55",	\
+  "r56",  "r57",  "r58",  "r59",  "r60",  "r61",  "r62",  "r63",	\
+  "fr0",  "fr1",  "fr2",  "fr3",  "fr4",  "fr5",  "fr6",  "fr7", 	\
+  "fr8",  "fr9",  "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",	\
+  "fr16", "fr17", "fr18", "fr19", "fr20", "fr21", "fr22", "fr23",	\
+  "fr24", "fr25", "fr26", "fr27", "fr28", "fr29", "fr30", "fr31",	\
+  "fr32", "fr33", "fr34", "fr35", "fr36", "fr37", "fr38", "fr39", 	\
+  "fr40", "fr41", "fr42", "fr43", "fr44", "fr45", "fr46", "fr47",	\
+  "fr48", "fr49", "fr50", "fr51", "fr52", "fr53", "fr54", "fr55",	\
+  "fr56", "fr57", "fr58", "fr59", "fr60", "fr61", "fr62", "fr63",	\
+  "tr0",  "tr1",  "tr2",  "tr3",  "tr4",  "tr5",  "tr6",  "tr7", 	\
+  "xd0",  "xd2",  "xd4",  "xd6",  "xd8",  "xd10", "xd12", "xd14",	\
+  "gbr",  "ap",	  "pr",   "t",    "mach", "macl", "fpul", "fpscr",	\
+  "rap",  "sfp"								\
+}
+
+#define REGNAMES_ARR_INDEX_1(index) \
+  (sh_register_names[index])
+#define REGNAMES_ARR_INDEX_2(index) \
+  REGNAMES_ARR_INDEX_1 ((index)), REGNAMES_ARR_INDEX_1 ((index)+1)
+#define REGNAMES_ARR_INDEX_4(index) \
+  REGNAMES_ARR_INDEX_2 ((index)), REGNAMES_ARR_INDEX_2 ((index)+2)
+#define REGNAMES_ARR_INDEX_8(index) \
+  REGNAMES_ARR_INDEX_4 ((index)), REGNAMES_ARR_INDEX_4 ((index)+4)
+#define REGNAMES_ARR_INDEX_16(index) \
+  REGNAMES_ARR_INDEX_8 ((index)), REGNAMES_ARR_INDEX_8 ((index)+8)
+#define REGNAMES_ARR_INDEX_32(index) \
+  REGNAMES_ARR_INDEX_16 ((index)), REGNAMES_ARR_INDEX_16 ((index)+16)
+#define REGNAMES_ARR_INDEX_64(index) \
+  REGNAMES_ARR_INDEX_32 ((index)), REGNAMES_ARR_INDEX_32 ((index)+32)
+
+#define REGISTER_NAMES \
+{ \
+  REGNAMES_ARR_INDEX_64 (0), \
+  REGNAMES_ARR_INDEX_64 (64), \
+  REGNAMES_ARR_INDEX_8 (128), \
+  REGNAMES_ARR_INDEX_8 (136), \
+  REGNAMES_ARR_INDEX_8 (144), \
+  REGNAMES_ARR_INDEX_2 (152) \
+}
+
+#define ADDREGNAMES_SIZE 32
+#define MAX_ADDITIONAL_REGISTER_NAME_LENGTH 4
+extern char sh_additional_register_names[ADDREGNAMES_SIZE] \
+  [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1];
+
+#define SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER			\
+{									\
+  "dr0",  "dr2",  "dr4",  "dr6",  "dr8",  "dr10", "dr12", "dr14",	\
+  "dr16", "dr18", "dr20", "dr22", "dr24", "dr26", "dr28", "dr30",	\
+  "dr32", "dr34", "dr36", "dr38", "dr40", "dr42", "dr44", "dr46",	\
+  "dr48", "dr50", "dr52", "dr54", "dr56", "dr58", "dr60", "dr62"	\
+}
+
+#define ADDREGNAMES_REGNO(index) \
+  ((index < 32) ? (FIRST_FP_REG + (index) * 2) \
+   : (-1))
+
+#define ADDREGNAMES_ARR_INDEX_1(index) \
+  { (sh_additional_register_names[index]), ADDREGNAMES_REGNO (index) }
+#define ADDREGNAMES_ARR_INDEX_2(index) \
+  ADDREGNAMES_ARR_INDEX_1 ((index)), ADDREGNAMES_ARR_INDEX_1 ((index)+1)
+#define ADDREGNAMES_ARR_INDEX_4(index) \
+  ADDREGNAMES_ARR_INDEX_2 ((index)), ADDREGNAMES_ARR_INDEX_2 ((index)+2)
+#define ADDREGNAMES_ARR_INDEX_8(index) \
+  ADDREGNAMES_ARR_INDEX_4 ((index)), ADDREGNAMES_ARR_INDEX_4 ((index)+4)
+#define ADDREGNAMES_ARR_INDEX_16(index) \
+  ADDREGNAMES_ARR_INDEX_8 ((index)), ADDREGNAMES_ARR_INDEX_8 ((index)+8)
+#define ADDREGNAMES_ARR_INDEX_32(index) \
+  ADDREGNAMES_ARR_INDEX_16 ((index)), ADDREGNAMES_ARR_INDEX_16 ((index)+16)
+
+#define ADDITIONAL_REGISTER_NAMES \
+{					\
+  ADDREGNAMES_ARR_INDEX_32 (0)		\
+}
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.  */
+
+/* There are many other relevant definitions in sh.md's md_constants.  */
+
+#define FIRST_GENERAL_REG R0_REG
+#define LAST_GENERAL_REG (FIRST_GENERAL_REG + (TARGET_SHMEDIA ? 63 : 15))
+#define FIRST_FP_REG DR0_REG
+#define LAST_FP_REG  (FIRST_FP_REG + \
+		      (TARGET_SHMEDIA_FPU ? 63 : TARGET_SH2E ? 15 : -1))
+#define FIRST_XD_REG XD0_REG
+#define LAST_XD_REG  (FIRST_XD_REG + ((TARGET_SH4 && TARGET_FMOVD) ? 7 : -1))
+#define FIRST_TARGET_REG TR0_REG
+#define LAST_TARGET_REG  (FIRST_TARGET_REG + (TARGET_SHMEDIA ? 7 : -1))
+
+/* Registers that can be accessed through bank0 or bank1 depending on sr.md.  */
+#define FIRST_BANKED_REG R0_REG
+#define LAST_BANKED_REG R7_REG
+
+#define BANKED_REGISTER_P(REGNO) \
+  IN_RANGE ((REGNO), \
+	    (unsigned HOST_WIDE_INT) FIRST_BANKED_REG, \
+	    (unsigned HOST_WIDE_INT) LAST_BANKED_REG)
+
+#define GENERAL_REGISTER_P(REGNO) \
+  IN_RANGE ((REGNO), \
+	    (unsigned HOST_WIDE_INT) FIRST_GENERAL_REG, \
+	    (unsigned HOST_WIDE_INT) LAST_GENERAL_REG)
+
+#define GENERAL_OR_AP_REGISTER_P(REGNO) \
+  (GENERAL_REGISTER_P (REGNO) || ((REGNO) == AP_REG) \
+   || ((REGNO) == FRAME_POINTER_REGNUM))
+
+#define FP_REGISTER_P(REGNO) \
+  ((int) (REGNO) >= FIRST_FP_REG && (int) (REGNO) <= LAST_FP_REG)
+
+#define XD_REGISTER_P(REGNO) \
+  ((int) (REGNO) >= FIRST_XD_REG && (int) (REGNO) <= LAST_XD_REG)
+
+#define FP_OR_XD_REGISTER_P(REGNO) \
+  (FP_REGISTER_P (REGNO) || XD_REGISTER_P (REGNO))
+
+#define FP_ANY_REGISTER_P(REGNO) \
+  (FP_REGISTER_P (REGNO) || XD_REGISTER_P (REGNO) || (REGNO) == FPUL_REG)
+
+#define SPECIAL_REGISTER_P(REGNO) \
+  ((REGNO) == GBR_REG || (REGNO) == T_REG \
+   || (REGNO) == MACH_REG || (REGNO) == MACL_REG)
+
+#define TARGET_REGISTER_P(REGNO) \
+  ((int) (REGNO) >= FIRST_TARGET_REG && (int) (REGNO) <= LAST_TARGET_REG)
+
+#define SHMEDIA_REGISTER_P(REGNO) \
+  (GENERAL_REGISTER_P (REGNO) || FP_REGISTER_P (REGNO) \
+   || TARGET_REGISTER_P (REGNO))
+
+/* This is to be used in TARGET_CONDITIONAL_REGISTER_USAGE, to mark
+   registers that should be fixed.  */
+#define VALID_REGISTER_P(REGNO) \
+  (SHMEDIA_REGISTER_P (REGNO) || XD_REGISTER_P (REGNO) \
+   || (REGNO) == AP_REG || (REGNO) == RAP_REG \
+   || (REGNO) == FRAME_POINTER_REGNUM \
+   || (TARGET_SH1 && (SPECIAL_REGISTER_P (REGNO) || (REGNO) == PR_REG)) \
+   || (TARGET_SH2E && (REGNO) == FPUL_REG))
+
+/* The mode that should be generally used to store a register by
+   itself in the stack, or to load it back.  */
+#define REGISTER_NATURAL_MODE(REGNO) \
+  (FP_REGISTER_P (REGNO) ? SFmode \
+   : XD_REGISTER_P (REGNO) ? DFmode \
+   : TARGET_SHMEDIA && ! HARD_REGNO_CALL_PART_CLOBBERED ((REGNO), DImode) \
+   ? DImode \
+   : SImode)
+
+#define FIRST_PSEUDO_REGISTER 154
+
+/* Don't count soft frame pointer.  */
+#define DWARF_FRAME_REGISTERS (FIRST_PSEUDO_REGISTER - 1)
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.
+
+   Mach register is fixed 'cause it's only 10 bits wide for SH1.
+   It is 32 bits wide for SH2.  */
+#define FIXED_REGISTERS							\
+{									\
+/* Regular registers.  */						\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      1,		\
+  /* r16 is reserved, r18 is the former pr.  */				\
+  1,      0,      0,      0,      0,      0,      0,      0,		\
+  /* r24 is reserved for the OS; r25, for the assembler or linker.  */	\
+  /* r26 is a global variable data pointer; r27 is for constants.  */	\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      1,		\
+/* FP registers.  */							\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+/* Branch target registers.  */						\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+/* XD registers.  */							\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+/*"gbr",  "ap",	  "pr",   "t",    "mach", "macl", "fpul", "fpscr", */	\
+  1,      1,      1,      1,      1,      1,      0,      1,		\
+/*"rap",  "sfp" */							\
+  1,	  1,								\
+}
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS						\
+{									\
+/* Regular registers.  */						\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs.	\
+     Only the lower 32bits of R10-R14 are guaranteed to be preserved	\
+     across SH5 function calls.  */					\
+  0,      0,      0,      0,      0,      0,      0,      1,		\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      1,      1,      1,      1,		\
+/* FP registers.  */							\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+  1,      1,      1,      1,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+  0,      0,      0,      0,      0,      0,      0,      0,		\
+/* Branch target registers.  */						\
+  1,      1,      1,      1,      1,      0,      0,      0,		\
+/* XD registers.  */							\
+  1,      1,      1,      1,      1,      1,      0,      0,		\
+/*"gbr",  "ap",	  "pr",   "t",    "mach", "macl", "fpul", "fpscr", */	\
+  1,      1,      1,      1,      1,      1,      1,      1,		\
+/*"rap",  "sfp" */							\
+  1,	  1,								\
+}
+
+/* TARGET_CONDITIONAL_REGISTER_USAGE might want to make a register
+   call-used, yet fixed, like PIC_OFFSET_TABLE_REGNUM.  */
+#define CALL_REALLY_USED_REGISTERS CALL_USED_REGISTERS
+
+/* Only the lower 32-bits of R10-R14 are guaranteed to be preserved
+   across SHcompact function calls.  We can't tell whether a called
+   function is SHmedia or SHcompact, so we assume it may be when
+   compiling SHmedia code with the 32-bit ABI, since that's the only
+   ABI that can be linked with SHcompact code.  */
+#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO,MODE) \
+  (TARGET_SHMEDIA32 \
+   && GET_MODE_SIZE (MODE) > 4 \
+   && (((REGNO) >= FIRST_GENERAL_REG + 10 \
+	&& (REGNO) <= FIRST_GENERAL_REG + 15) \
+       || TARGET_REGISTER_P (REGNO) \
+       || (REGNO) == PR_MEDIA_REG))
+
+/* Return number of consecutive hard regs needed starting at reg REGNO
+   to hold something of mode MODE.
+   This is ordinarily the length in words of a value of mode MODE
+   but can be less for certain modes in special long registers.
+
+   On the SH all but the XD regs are UNITS_PER_WORD bits wide.  */
+#define HARD_REGNO_NREGS(REGNO, MODE) \
+   (XD_REGISTER_P (REGNO) \
+    ? ((GET_MODE_SIZE (MODE) + (2*UNITS_PER_WORD - 1)) / (2*UNITS_PER_WORD)) \
+    : (TARGET_SHMEDIA && FP_REGISTER_P (REGNO)) \
+    ? ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD/2 - 1) / (UNITS_PER_WORD/2)) \
+    : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+
+/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.  */
+#define HARD_REGNO_MODE_OK(REGNO, MODE)	\
+  sh_hard_regno_mode_ok ((REGNO), (MODE))
+
+/* Value is 1 if it is a good idea to tie two pseudo registers
+   when one has mode MODE1 and one has mode MODE2.
+   If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+   for any hard reg, then this must be 0 for correct output.
+   That's the case for xd registers: we don't hold SFmode values in
+   them, so we can't tie an SFmode pseudos with one in another
+   floating-point mode.  */
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  ((MODE1) == (MODE2) \
+   || (TARGET_SHMEDIA \
+       && GET_MODE_SIZE (MODE1) == GET_MODE_SIZE (MODE2) \
+       && INTEGRAL_MODE_P (MODE1) && INTEGRAL_MODE_P (MODE2)) \
+   || (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2) \
+       && (TARGET_SHMEDIA ? ((GET_MODE_SIZE (MODE1) <= 4) \
+			      && (GET_MODE_SIZE (MODE2) <= 4)) \
+			  : ((MODE1) != SFmode && (MODE2) != SFmode))))
+
+/* A C expression that is nonzero if hard register NEW_REG can be
+   considered for use as a rename register for OLD_REG register */
+#define HARD_REGNO_RENAME_OK(OLD_REG, NEW_REG) \
+   sh_hard_regno_rename_ok (OLD_REG, NEW_REG)
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Define this if the program counter is overloaded on a register.  */
+/* #define PC_REGNUM		15*/
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM	SP_REG
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM	FP_REG
+
+/* Base register for access to local variables of the function.  */
+#define FRAME_POINTER_REGNUM	153
+
+/* Fake register that holds the address on the stack of the
+   current function's return address.  */
+#define RETURN_ADDRESS_POINTER_REGNUM RAP_REG
+
+/* Register to hold the addressing base for position independent
+   code access to data items.  */
+#define PIC_OFFSET_TABLE_REGNUM	(flag_pic ? PIC_REG : INVALID_REGNUM)
+
+#define GOT_SYMBOL_NAME "*_GLOBAL_OFFSET_TABLE_"
+
+/* Definitions for register eliminations.
+
+   We have three registers that can be eliminated on the SH.  First, the
+   frame pointer register can often be eliminated in favor of the stack
+   pointer register.  Secondly, the argument pointer register can always be
+   eliminated; it is replaced with either the stack or frame pointer.
+   Third, there is the return address pointer, which can also be replaced
+   with either the stack or the frame pointer.
+
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
+
+   If you add any registers here that are not actually hard registers,
+   and that have any alternative of elimination that doesn't always
+   apply, you need to amend calc_live_regs to exclude it, because
+   reload spills all eliminable registers where it sees an
+   can_eliminate == 0 entry, thus making them 'live' .
+   If you add any hard registers that can be eliminated in different
+   ways, you have to patch reload to spill them only when all alternatives
+   of elimination fail.  */
+#define ELIMINABLE_REGS						\
+{{ HARD_FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+ { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},		\
+ { RETURN_ADDRESS_POINTER_REGNUM, STACK_POINTER_REGNUM},	\
+ { RETURN_ADDRESS_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},	\
+ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+ { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},}
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  OFFSET = initial_elimination_offset ((FROM), (TO))
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM	AP_REG
+
+/* Register in which the static-chain is passed to a function.  */
+#define STATIC_CHAIN_REGNUM	(TARGET_SH5 ? 1 : 3)
+
+/* Don't default to pcc-struct-return, because we have already specified
+   exactly how to return structures in the TARGET_RETURN_IN_MEMORY
+   target hook.  */
+#define DEFAULT_PCC_STRUCT_RETURN 0
+
+#define SHMEDIA_REGS_STACK_ADJUST() \
+  (TARGET_SHCOMPACT && crtl->saves_all_registers \
+   ? (8 * (/* r28-r35 */ 8 + /* r44-r59 */ 16 + /* tr5-tr7 */ 3) \
+      + (TARGET_FPU_ANY ? 4 * (/* fr36 - fr63 */ 28) : 0)) \
+   : 0)
+
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.
+
+   The SH has two sorts of general registers, R0 and the rest.  R0 can
+   be used as the destination of some of the arithmetic ops. There are
+   also some special purpose registers; the T bit register, the
+   Procedure Return Register and the Multiply Accumulate Registers.
+
+   Place GENERAL_REGS after FPUL_REGS so that it will be preferred by
+   reg_class_subunion.  We don't want to have an actual union class
+   of these, because it would only be used when both classes are calculated
+   to give the same cost, but there is only one FPUL register.
+   Besides, regclass fails to notice the different REGISTER_MOVE_COSTS
+   applying to the actual instruction alternative considered.  E.g., the
+   y/r alternative of movsi_ie is considered to have no more cost that
+   the r/r alternative, which is patently untrue.  */
+enum reg_class
+{
+  NO_REGS,
+  R0_REGS,
+  PR_REGS,
+  T_REGS,
+  MAC_REGS,
+  FPUL_REGS,
+  SIBCALL_REGS,
+  NON_SP_REGS,
+  GENERAL_REGS,
+  FP0_REGS,
+  FP_REGS,
+  DF_REGS,
+  FPSCR_REGS,
+  GENERAL_FP_REGS,
+  GENERAL_DF_REGS,
+  TARGET_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES  (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+#define REG_CLASS_NAMES	\
+{			\
+  "NO_REGS",		\
+  "R0_REGS",		\
+  "PR_REGS",		\
+  "T_REGS",		\
+  "MAC_REGS",		\
+  "FPUL_REGS",		\
+  "SIBCALL_REGS",	\
+  "NON_SP_REGS",	\
+  "GENERAL_REGS",	\
+  "FP0_REGS",		\
+  "FP_REGS",		\
+  "DF_REGS",		\
+  "FPSCR_REGS",		\
+  "GENERAL_FP_REGS",	\
+  "GENERAL_DF_REGS",	\
+  "TARGET_REGS",	\
+  "ALL_REGS",		\
+}
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+#define REG_CLASS_CONTENTS						\
+{									\
+/* NO_REGS:  */								\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	\
+/* R0_REGS:  */								\
+  { 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	\
+/* PR_REGS:  */								\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00040000 },	\
+/* T_REGS:  */								\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00080000 },	\
+/* MAC_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00300000 },	\
+/* FPUL_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00400000 },	\
+/* SIBCALL_REGS: Initialized in TARGET_CONDITIONAL_REGISTER_USAGE.  */	\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	\
+/* NON_SP_REGS:  */							\
+  { 0xffff7fff, 0xffffffff, 0x00000000, 0x00000000, 0x03020000 },	\
+/* GENERAL_REGS:  */							\
+  { 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x03020000 },	\
+/* FP0_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x00000000 },	\
+/* FP_REGS:  */								\
+  { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000 },	\
+/* DF_REGS:  */								\
+  { 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x0000ff00 },	\
+/* FPSCR_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00800000 },	\
+/* GENERAL_FP_REGS:  */							\
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x03020000 },	\
+/* GENERAL_DF_REGS:  */							\
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0302ff00 },	\
+/* TARGET_REGS:  */							\
+  { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff },	\
+/* ALL_REGS:  */							\
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x03ffffff },	\
+}
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+extern enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
+#define REGNO_REG_CLASS(REGNO) regno_reg_class[(REGNO)]
+
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
+  sh_small_register_classes_for_mode_p
+
+/* The order in which register should be allocated.  */
+/* Sometimes FP0_REGS becomes the preferred class of a floating point pseudo,
+   and GENERAL_FP_REGS the alternate class.  Since FP0 is likely to be
+   spilled or used otherwise, we better have the FP_REGS allocated first.  */
+#define REG_ALLOC_ORDER \
+  {/* Caller-saved FPRs */ \
+    65, 66, 67, 68, 69, 70, 71, 64, \
+    72, 73, 74, 75, 80, 81, 82, 83, \
+    84, 85, 86, 87, 88, 89, 90, 91, \
+    92, 93, 94, 95, 96, 97, 98, 99, \
+   /* Callee-saved FPRs */ \
+    76, 77, 78, 79,100,101,102,103, \
+   104,105,106,107,108,109,110,111, \
+   112,113,114,115,116,117,118,119, \
+   120,121,122,123,124,125,126,127, \
+   136,137,138,139,140,141,142,143, \
+   /* FPSCR */ 151, \
+   /* Caller-saved GPRs (except 8/9 on SH1-4) */ \
+     1,  2,  3,  7,  6,  5,  4,  0, \
+     8,  9, 17, 19, 20, 21, 22, 23, \
+    36, 37, 38, 39, 40, 41, 42, 43, \
+    60, 61, 62, \
+   /* SH1-4 callee-saved saved GPRs / SH5 partially-saved GPRs */ \
+    10, 11, 12, 13, 14, 18, \
+    /* SH5 callee-saved GPRs */ \
+    28, 29, 30, 31, 32, 33, 34, 35, \
+    44, 45, 46, 47, 48, 49, 50, 51, \
+    52, 53, 54, 55, 56, 57, 58, 59, \
+   /* FPUL */ 150, \
+   /* SH5 branch target registers */ \
+   128,129,130,131,132,133,134,135, \
+   /* Fixed registers */ \
+    15, 16, 24, 25, 26, 27, 63,144, \
+   145,146,147,148,149,152,153 }
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS \
+  (!ALLOW_INDEXED_ADDRESS ? NO_REGS : TARGET_SHMEDIA ? GENERAL_REGS : R0_REGS)
+#define BASE_REG_CLASS	 GENERAL_REGS
+
+/* Defines for sh.md and constraints.md.  */
+
+#define CONST_OK_FOR_I08(VALUE) (((HOST_WIDE_INT)(VALUE))>= -128 \
+				 && ((HOST_WIDE_INT)(VALUE)) <= 127)
+#define CONST_OK_FOR_I16(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -32768 \
+				 && ((HOST_WIDE_INT)(VALUE)) <= 32767)
+
+#define CONST_OK_FOR_J16(VALUE) \
+  ((HOST_BITS_PER_WIDE_INT >= 64 && (VALUE) == (HOST_WIDE_INT) 0xffffffff) \
+   || (HOST_BITS_PER_WIDE_INT >= 64 && (VALUE) == (HOST_WIDE_INT) -1 << 32))
+
+#define CONST_OK_FOR_K08(VALUE) (((HOST_WIDE_INT)(VALUE))>= 0 \
+				 && ((HOST_WIDE_INT)(VALUE)) <= 255)
+
+#define ZERO_EXTRACT_ANDMASK(EXTRACT_SZ_RTX, EXTRACT_POS_RTX)\
+  (((1 << INTVAL (EXTRACT_SZ_RTX)) - 1) << INTVAL (EXTRACT_POS_RTX))
+
+/* Return the maximum number of consecutive registers
+   needed to represent mode MODE in a register of class CLASS.
+
+   If TARGET_SHMEDIA, we need two FP registers per word.
+   Otherwise we will need at most one register per word.  */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+    (TARGET_SHMEDIA \
+     && TEST_HARD_REG_BIT (reg_class_contents[CLASS], FIRST_FP_REG) \
+     ? (GET_MODE_SIZE (MODE) + UNITS_PER_WORD/2 - 1) / (UNITS_PER_WORD/2) \
+     : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* If defined, gives a class of registers that cannot be used as the
+   operand of a SUBREG that changes the mode of the object illegally.
+   ??? We need to renumber the internal numbers for the frnn registers
+   when in little endian in order to allow mode size changes.  */
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  sh_cannot_change_mode_class (FROM, TO, CLASS)
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define the number of registers that can hold parameters.
+   These macros are used only in other macro definitions below.  */
+#define NPARM_REGS(MODE) \
+  (TARGET_FPU_ANY && (MODE) == SFmode \
+   ? (TARGET_SH5 ? 12 : 8) \
+   : (TARGET_SH4 || TARGET_SH2A_DOUBLE) \
+     && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+	 || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
+   ? (TARGET_SH5 ? 12 : 8) \
+   : (TARGET_SH5 ? 8 : 4))
+
+#define FIRST_PARM_REG (FIRST_GENERAL_REG + (TARGET_SH5 ? 2 : 4))
+#define FIRST_RET_REG  (FIRST_GENERAL_REG + (TARGET_SH5 ? 2 : 0))
+
+#define FIRST_FP_PARM_REG (FIRST_FP_REG + (TARGET_SH5 ? 0 : 4))
+#define FIRST_FP_RET_REG FIRST_FP_REG
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD
+
+/*  Define this macro to nonzero if the addresses of local variable slots
+    are at negative offsets from the frame pointer.  */
+#define FRAME_GROWS_DOWNWARD 1
+
+/* Offset from the frame pointer to the first local variable slot to
+   be allocated.  */
+#define STARTING_FRAME_OFFSET  0
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.  */
+/* Don't define PUSH_ROUNDING, since the hardware doesn't do this.
+   When PUSH_ROUNDING is not defined, PARM_BOUNDARY will cause gcc to
+   do correct alignment.  */
+#if 0
+#define PUSH_ROUNDING(NPUSHED)  (((NPUSHED) + 3) & ~3)
+#endif
+
+/* Offset of first parameter from the argument pointer register value.  */
+#define FIRST_PARM_OFFSET(FNDECL)  0
+
+/* Value is the number of bytes of arguments automatically popped when
+   calling a subroutine.
+   CUM is the accumulated argument list.
+
+   On SHcompact, the call trampoline pops arguments off the stack.  */
+#define CALL_POPS_ARGS(CUM) (TARGET_SHCOMPACT ? (CUM).stack_regs * 8 : 0)
+
+/* Some subroutine macros specific to this machine.  */
+
+#define BASE_RETURN_VALUE_REG(MODE) \
+  ((TARGET_FPU_ANY && ((MODE) == SFmode))		\
+   ? FIRST_FP_RET_REG					\
+   : TARGET_FPU_ANY && (MODE) == SCmode			\
+   ? FIRST_FP_RET_REG					\
+   : (TARGET_FPU_DOUBLE					\
+      && ((MODE) == DFmode || (MODE) == SFmode		\
+	  || (MODE) == DCmode || (MODE) == SCmode ))	\
+   ? FIRST_FP_RET_REG					\
+   : FIRST_RET_REG)
+
+#define BASE_ARG_REG(MODE) \
+  ((TARGET_SH2E && ((MODE) == SFmode))			\
+   ? FIRST_FP_PARM_REG					\
+   : (TARGET_SH4 || TARGET_SH2A_DOUBLE) && (GET_MODE_CLASS (MODE) == MODE_FLOAT	\
+		    || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT)\
+   ? FIRST_FP_PARM_REG					\
+   : FIRST_PARM_REG)
+
+/* 1 if N is a possible register number for function argument passing.  */
+/* ??? There are some callers that pass REGNO as int, and others that pass
+   it as unsigned.  We get warnings unless we do casts everywhere.  */
+#define FUNCTION_ARG_REGNO_P(REGNO) \
+  (((unsigned) (REGNO) >= (unsigned) FIRST_PARM_REG			\
+    && (unsigned) (REGNO) < (unsigned) (FIRST_PARM_REG + NPARM_REGS (SImode)))\
+   || (TARGET_FPU_ANY							\
+       && (unsigned) (REGNO) >= (unsigned) FIRST_FP_PARM_REG		\
+       && (unsigned) (REGNO) < (unsigned) (FIRST_FP_PARM_REG		\
+					   + NPARM_REGS (SFmode))))
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On SH, this is a single integer, which is a number of words
+   of arguments scanned so far (including the invisible argument,
+   if any, which holds the structure-value-address).
+   Thus NARGREGS or more means all following args should go on the stack.  */
+enum sh_arg_class { SH_ARG_INT = 0, SH_ARG_FLOAT = 1 };
+struct sh_args {
+    int arg_count[2];
+    int force_mem;
+  /* Nonzero if a prototype is available for the function.  */
+    int prototype_p;
+  /* The number of an odd floating-point register, that should be used
+     for the next argument of type float.  */
+    int free_single_fp_reg;
+  /* Whether we're processing an outgoing function call.  */
+    int outgoing;
+  /* The number of general-purpose registers that should have been
+     used to pass partial arguments, that are passed totally on the
+     stack.  On SHcompact, a call trampoline will pop them off the
+     stack before calling the actual function, and, if the called
+     function is implemented in SHcompact mode, the incoming arguments
+     decoder will push such arguments back onto the stack.  For
+     incoming arguments, STACK_REGS also takes into account other
+     arguments passed by reference, that the decoder will also push
+     onto the stack.  */
+    int stack_regs;
+  /* The number of general-purpose registers that should have been
+     used to pass arguments, if the arguments didn't have to be passed
+     by reference.  */
+    int byref_regs;
+  /* Set as by shcompact_byref if the current argument is to be passed
+     by reference.  */
+    int byref;
+
+  /* call_cookie is a bitmask used by call expanders, as well as
+     function prologue and epilogues, to allow SHcompact to comply
+     with the SH5 32-bit ABI, that requires 64-bit registers to be
+     used even though only the lower 32-bit half is visible in
+     SHcompact mode.  The strategy is to call SHmedia trampolines.
+
+     The alternatives for each of the argument-passing registers are
+     (a) leave it unchanged; (b) pop it off the stack; (c) load its
+     contents from the address in it; (d) add 8 to it, storing the
+     result in the next register, then (c); (e) copy it from some
+     floating-point register,
+
+     Regarding copies from floating-point registers, r2 may only be
+     copied from dr0.  r3 may be copied from dr0 or dr2.  r4 maybe
+     copied from dr0, dr2 or dr4.  r5 maybe copied from dr0, dr2,
+     dr4 or dr6.  r6 may be copied from dr0, dr2, dr4, dr6 or dr8.
+     r7 through to r9 may be copied from dr0, dr2, dr4, dr8, dr8 or
+     dr10.
+
+     The bit mask is structured as follows:
+
+     - 1 bit to tell whether to set up a return trampoline.
+
+     - 3 bits to count the number consecutive registers to pop off the
+       stack.
+
+     - 4 bits for each of r9, r8, r7 and r6.
+
+     - 3 bits for each of r5, r4, r3 and r2.
+
+     - 3 bits set to 0 (the most significant ones)
+
+        3           2            1           0
+       1098 7654 3210 9876 5432 1098 7654 3210
+       FLPF LPFL PFLP FFLP FFLP FFLP FFLP SSST
+       2223 3344 4555 6666 7777 8888 9999 SSS-
+
+     - If F is set, the register must be copied from an FP register,
+       whose number is encoded in the remaining bits.
+
+     - Else, if L is set, the register must be loaded from the address
+       contained in it.  If the P bit is *not* set, the address of the
+       following dword should be computed first, and stored in the
+       following register.
+
+     - Else, if P is set, the register alone should be popped off the
+       stack.
+
+     - After all this processing, the number of registers represented
+       in SSS will be popped off the stack.  This is an optimization
+       for pushing/popping consecutive registers, typically used for
+       varargs and large arguments partially passed in registers.
+
+     - If T is set, a return trampoline will be set up for 64-bit
+     return values to be split into 2 32-bit registers.  */
+    long call_cookie;
+
+  /* This is set to nonzero when the call in question must use the Renesas ABI,
+     even without the -mrenesas option.  */
+    int renesas_abi;
+};
+
+#define CALL_COOKIE_RET_TRAMP_SHIFT 0
+#define CALL_COOKIE_RET_TRAMP(VAL) ((VAL) << CALL_COOKIE_RET_TRAMP_SHIFT)
+#define CALL_COOKIE_STACKSEQ_SHIFT 1
+#define CALL_COOKIE_STACKSEQ(VAL) ((VAL) << CALL_COOKIE_STACKSEQ_SHIFT)
+#define CALL_COOKIE_STACKSEQ_GET(COOKIE) \
+  (((COOKIE) >> CALL_COOKIE_STACKSEQ_SHIFT) & 7)
+#define CALL_COOKIE_INT_REG_SHIFT(REG) \
+  (4 * (7 - (REG)) + (((REG) <= 2) ? ((REG) - 2) : 1) + 3)
+#define CALL_COOKIE_INT_REG(REG, VAL) \
+  ((VAL) << CALL_COOKIE_INT_REG_SHIFT (REG))
+#define CALL_COOKIE_INT_REG_GET(COOKIE, REG) \
+  (((COOKIE) >> CALL_COOKIE_INT_REG_SHIFT (REG)) & ((REG) < 4 ? 7 : 15))
+
+#define CUMULATIVE_ARGS  struct sh_args
+
+#define GET_SH_ARG_CLASS(MODE) \
+  ((TARGET_FPU_ANY && (MODE) == SFmode) \
+   ? SH_ARG_FLOAT \
+   /* There's no mention of complex float types in the SH5 ABI, so we
+      should presumably handle them as aggregate types.  */ \
+   : TARGET_SH5 && GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT \
+   ? SH_ARG_INT \
+   : TARGET_FPU_DOUBLE && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+			   || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
+   ? SH_ARG_FLOAT : SH_ARG_INT)
+
+#define ROUND_ADVANCE(SIZE) \
+  (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+
+/* Round a register number up to a proper boundary for an arg of mode
+   MODE.
+
+   The SH doesn't care about double alignment, so we only
+   round doubles to even regs when asked to explicitly.  */
+#define ROUND_REG(CUM, MODE) \
+   (((TARGET_ALIGN_DOUBLE						\
+      || ((TARGET_SH4 || TARGET_SH2A_DOUBLE) 				\
+	  && ((MODE) == DFmode || (MODE) == DCmode)			\
+	  && (CUM).arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (MODE)))	\
+     && GET_MODE_UNIT_SIZE ((MODE)) > UNITS_PER_WORD)			\
+    ? ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)]			\
+       + ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] & 1))		\
+    : (CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)])
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.
+
+   On SH, the offset always starts at 0: the first parm reg is always
+   the same reg for a given argument class.
+
+   For TARGET_HITACHI, the structure value pointer is passed in memory.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
+  sh_init_cumulative_args (& (CUM), (FNTYPE), (LIBNAME), (FNDECL),\
+			   (N_NAMED_ARGS), VOIDmode)
+
+#define INIT_CUMULATIVE_LIBCALL_ARGS(CUM, MODE, LIBNAME) \
+  sh_init_cumulative_args (& (CUM), NULL_TREE, (LIBNAME), NULL_TREE, 0, (MODE))
+
+/* Return boolean indicating arg of mode MODE will be passed in a reg.
+   This macro is only used in this file.  */
+#define PASS_IN_REG_P(CUM, MODE, TYPE) \
+  (((TYPE) == 0 \
+    || (! TREE_ADDRESSABLE ((TYPE)) \
+	&& (! (TARGET_HITACHI || (CUM).renesas_abi) \
+	    || ! (AGGREGATE_TYPE_P (TYPE) \
+		  || (!TARGET_FPU_ANY \
+		      && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
+			  && GET_MODE_SIZE (MODE) > GET_MODE_SIZE (SFmode))))))) \
+   && ! (CUM).force_mem \
+   && (TARGET_SH2E \
+       ? ((MODE) == BLKmode \
+	  ? (((CUM).arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD \
+	      + int_size_in_bytes (TYPE)) \
+	     <= NPARM_REGS (SImode) * UNITS_PER_WORD) \
+	  : ((ROUND_REG((CUM), (MODE)) \
+	      + HARD_REGNO_NREGS (BASE_ARG_REG (MODE), (MODE))) \
+	     <= NPARM_REGS (MODE))) \
+       : ROUND_REG ((CUM), (MODE)) < NPARM_REGS (MODE)))
+
+/* By accident we got stuck with passing SCmode on SH4 little endian
+   in two registers that are nominally successive - which is different from
+   two single SFmode values, where we take endianness translation into
+   account.  That does not work at all if an odd number of registers is
+   already in use, so that got fixed, but library functions are still more
+   likely to use complex numbers without mixing them with SFmode arguments
+   (which in C would have to be structures), so for the sake of ABI
+   compatibility the way SCmode values are passed when an even number of
+   FP registers is in use remains different from a pair of SFmode values for
+   now.
+   I.e.:
+   foo (double); a: fr5,fr4
+   foo (float a, float b); a: fr5 b: fr4
+   foo (__complex float a); a.real fr4 a.imag: fr5 - for consistency,
+			    this should be the other way round...
+   foo (float a, __complex float b); a: fr5 b.real: fr4 b.imag: fr7  */
+#define FUNCTION_ARG_SCmode_WART 1
+
+/* If an argument of size 5, 6 or 7 bytes is to be passed in a 64-bit
+   register in SHcompact mode, it must be padded in the most
+   significant end.  This means that passing it by reference wouldn't
+   pad properly on a big-endian machine.  In this particular case, we
+   pass this argument on the stack, in a way that the call trampoline
+   will load its value into the appropriate register.  */
+#define SHCOMPACT_FORCE_ON_STACK(MODE,TYPE) \
+  ((MODE) == BLKmode \
+   && TARGET_SHCOMPACT \
+   && TARGET_BIG_ENDIAN \
+   && int_size_in_bytes (TYPE) > 4 \
+   && int_size_in_bytes (TYPE) < 8)
+
+/* Minimum alignment for an argument to be passed by callee-copy
+   reference.  We need such arguments to be aligned to 8 byte
+   boundaries, because they'll be loaded using quad loads.  */
+#define SH_MIN_ALIGN_FOR_CALLEE_COPY (8 * BITS_PER_UNIT)
+
+/* The SH5 ABI requires floating-point arguments to be passed to
+   functions without a prototype in both an FP register and a regular
+   register or the stack.  When passing the argument in both FP and
+   general-purpose registers, list the FP register first.  */
+#define SH5_PROTOTYPELESS_FLOAT_ARG(CUM,MODE) \
+  (gen_rtx_PARALLEL							\
+   ((MODE),								\
+    gen_rtvec (2,							\
+	       gen_rtx_EXPR_LIST					\
+	       (VOIDmode,						\
+		((CUM).arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode) \
+		 ? gen_rtx_REG ((MODE), FIRST_FP_PARM_REG		\
+				+ (CUM).arg_count[(int) SH_ARG_FLOAT])	\
+		 : NULL_RTX),						\
+		const0_rtx),						\
+	       gen_rtx_EXPR_LIST					\
+	       (VOIDmode,						\
+		((CUM).arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode) \
+		 ? gen_rtx_REG ((MODE), FIRST_PARM_REG			\
+				+ (CUM).arg_count[(int) SH_ARG_INT])	\
+		 : gen_rtx_REG ((MODE), FIRST_FP_PARM_REG		\
+				+ (CUM).arg_count[(int) SH_ARG_FLOAT])), \
+		const0_rtx))))
+
+/* The SH5 ABI requires regular registers or stack slots to be
+   reserved for floating-point arguments.  Registers are taken care of
+   in FUNCTION_ARG_ADVANCE, but stack slots must be reserved here.
+   Unfortunately, there's no way to just reserve a stack slot, so
+   we'll end up needlessly storing a copy of the argument in the
+   stack.  For incoming arguments, however, the PARALLEL will be
+   optimized to the register-only form, and the value in the stack
+   slot won't be used at all.  */
+#define SH5_PROTOTYPED_FLOAT_ARG(CUM,MODE,REG) \
+  ((CUM).arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)		\
+   ? gen_rtx_REG ((MODE), (REG))					\
+   : gen_rtx_PARALLEL ((MODE),						\
+		       gen_rtvec (2,					\
+				  gen_rtx_EXPR_LIST			\
+				  (VOIDmode, NULL_RTX,			\
+				   const0_rtx),				\
+				  gen_rtx_EXPR_LIST			\
+				  (VOIDmode, gen_rtx_REG ((MODE),	\
+							  (REG)),	\
+				   const0_rtx))))
+
+#define SH5_WOULD_BE_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) \
+  (TARGET_SH5							\
+   && ((MODE) == BLKmode || (MODE) == TImode || (MODE) == CDImode \
+       || (MODE) == DCmode) \
+   && ((CUM).arg_count[(int) SH_ARG_INT]			\
+       + (((MODE) == BLKmode ? int_size_in_bytes (TYPE)		\
+			     : GET_MODE_SIZE (MODE))		\
+	  + 7) / 8) > NPARM_REGS (SImode))
+
+/* Perform any needed actions needed for a function that is receiving a
+   variable number of arguments.  */
+
+/* Call the function profiler with a given profile label.
+   We use two .aligns, so as to make sure that both the .long is aligned
+   on a 4 byte boundary, and that the .long is a fixed distance (2 bytes)
+   from the trapa instruction.  */
+#define FUNCTION_PROFILER(STREAM,LABELNO)			\
+{								\
+  if (TARGET_SHMEDIA)						\
+    {								\
+      fprintf((STREAM), "\tmovi\t33,r0\n");			\
+      fprintf((STREAM), "\ttrapa\tr0\n");			\
+      asm_fprintf((STREAM), "\t.long\t%LLP%d\n", (LABELNO));	\
+    }								\
+  else								\
+    {								\
+      fprintf((STREAM), "\t.align\t2\n");			\
+      fprintf((STREAM), "\ttrapa\t#33\n");			\
+      fprintf((STREAM), "\t.align\t2\n");			\
+      asm_fprintf((STREAM), "\t.long\t%LLP%d\n", (LABELNO));	\
+    }								\
+}
+
+/* Define this macro if the code for function profiling should come
+   before the function prologue.  Normally, the profiling code comes
+   after.  */
+#define PROFILE_BEFORE_PROLOGUE
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+#define EXIT_IGNORE_STACK 1
+
+/*
+   On the SH, the trampoline looks like
+   2 0002 D202			mov.l	l2,r2
+   1 0000 D301			mov.l	l1,r3
+   3 0004 422B			jmp	@r2
+   4 0006 0009			nop
+   5 0008 00000000 	l1:  	.long   area
+   6 000c 00000000 	l2:	.long   function  */
+
+/* Length in units of the trampoline for entering a nested function.  */
+#define TRAMPOLINE_SIZE  (TARGET_SHMEDIA64 ? 40 : TARGET_SH5 ? 24 : 16)
+
+/* Alignment required for a trampoline in bits.  */
+#define TRAMPOLINE_ALIGNMENT \
+  ((CACHE_LOG < 3 \
+    || (optimize_size && ! (TARGET_HARD_SH4 || TARGET_SH5))) ? 32 \
+   : TARGET_SHMEDIA ? 256 : 64)
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.
+   FRAMEADDR is already the frame pointer of the COUNT frame, so we
+   can ignore COUNT.  */
+#define RETURN_ADDR_RTX(COUNT, FRAME)	\
+  (((COUNT) == 0) ? sh_get_pr_initial_val () : NULL_RTX)
+
+/* A C expression whose value is RTL representing the location of the
+   incoming return address at the beginning of any function, before the
+   prologue.  This RTL is either a REG, indicating that the return
+   value is saved in REG, or a MEM representing a location in
+   the stack.  */
+#define INCOMING_RETURN_ADDR_RTX \
+  gen_rtx_REG (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
+
+/* Addressing modes, and classification of registers for them.  */
+#define HAVE_POST_INCREMENT  TARGET_SH1
+#define HAVE_PRE_DECREMENT   TARGET_SH1
+
+#define USE_LOAD_POST_INCREMENT(mode)    ((mode == SImode || mode == DImode) \
+					  ? 0 : TARGET_SH1)
+#define USE_LOAD_PRE_DECREMENT(mode)     0
+#define USE_STORE_POST_INCREMENT(mode)   0
+#define USE_STORE_PRE_DECREMENT(mode)    ((mode == SImode || mode == DImode) \
+					  ? 0 : TARGET_SH1)
+
+#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
+  (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
+   < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2)))
+
+#define STORE_BY_PIECES_P(SIZE, ALIGN) \
+  (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \
+   < (optimize_size ? 2 : ((ALIGN >= 32) ? 16 : 2)))
+
+#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P(SIZE, ALIGN)
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+#define REGNO_OK_FOR_BASE_P(REGNO) \
+  (GENERAL_OR_AP_REGISTER_P (REGNO) \
+   || GENERAL_OR_AP_REGISTER_P (reg_renumber[(REGNO)]))
+#define REGNO_OK_FOR_INDEX_P(REGNO) \
+  (TARGET_SHMEDIA \
+   ? (GENERAL_REGISTER_P (REGNO) \
+      || GENERAL_REGISTER_P ((unsigned) reg_renumber[(REGNO)])) \
+   : (REGNO) == R0_REG || (unsigned) reg_renumber[(REGNO)] == R0_REG)
+
+/* Maximum number of registers that can appear in a valid memory
+   address.  */
+#define MAX_REGS_PER_ADDRESS 2
+
+/* Recognize any constant value that is a valid address.  */
+#define CONSTANT_ADDRESS_P(X)	(GET_CODE (X) == LABEL_REF)
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   The suitable hard regs are always accepted and all pseudo regs
+   are also accepted if STRICT is not set.  */
+
+/* Nonzero if X is a reg that can be used as a base reg.  */
+#define REG_OK_FOR_BASE_P(X, STRICT)			\
+  (GENERAL_OR_AP_REGISTER_P (REGNO (X))			\
+   || (!STRICT && REGNO (X) >= FIRST_PSEUDO_REGISTER))
+
+/* Nonzero if X is a reg that can be used as an index.  */
+#define REG_OK_FOR_INDEX_P(X, STRICT)			\
+  ((TARGET_SHMEDIA ? GENERAL_REGISTER_P (REGNO (X))	\
+    : REGNO (X) == R0_REG)				\
+   || (!STRICT && REGNO (X) >= FIRST_PSEUDO_REGISTER))
+
+/* Nonzero if X/OFFSET is a reg that can be used as an index.  */
+#define SUBREG_OK_FOR_INDEX_P(X, OFFSET, STRICT)	\
+  ((TARGET_SHMEDIA ? GENERAL_REGISTER_P (REGNO (X))	\
+    : REGNO (X) == R0_REG && OFFSET == 0)		\
+   || (!STRICT && REGNO (X) >= FIRST_PSEUDO_REGISTER))
+
+/* Macros for extra constraints.  */
+
+#define IS_PC_RELATIVE_LOAD_ADDR_P(OP)					\
+  ((GET_CODE ((OP)) == LABEL_REF)					\
+   || (GET_CODE ((OP)) == CONST						\
+       && GET_CODE (XEXP ((OP), 0)) == PLUS				\
+       && GET_CODE (XEXP (XEXP ((OP), 0), 0)) == LABEL_REF		\
+       && CONST_INT_P (XEXP (XEXP ((OP), 0), 1))))
+
+#define IS_NON_EXPLICIT_CONSTANT_P(OP)					\
+  (CONSTANT_P (OP)							\
+   && !CONST_INT_P (OP)							\
+   && GET_CODE (OP) != CONST_DOUBLE					\
+   && (!flag_pic							\
+       || (LEGITIMATE_PIC_OPERAND_P (OP)				\
+	   && !PIC_ADDR_P (OP)						\
+	   && GET_CODE (OP) != LABEL_REF)))
+
+/* Check whether OP is a datalabel unspec.  */
+#define DATALABEL_REF_NO_CONST_P(OP) \
+  (GET_CODE (OP) == UNSPEC \
+   && XINT ((OP), 1) == UNSPEC_DATALABEL \
+   && XVECLEN ((OP), 0) == 1 \
+   && GET_CODE (XVECEXP ((OP), 0, 0)) == LABEL_REF)
+
+#define GOT_ENTRY_P(OP) \
+  (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == UNSPEC \
+   && XINT (XEXP ((OP), 0), 1) == UNSPEC_GOT)
+
+#define GOTPLT_ENTRY_P(OP) \
+  (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == UNSPEC \
+   && XINT (XEXP ((OP), 0), 1) == UNSPEC_GOTPLT)
+
+#define UNSPEC_GOTOFF_P(OP) \
+  (GET_CODE (OP) == UNSPEC && XINT ((OP), 1) == UNSPEC_GOTOFF)
+
+#define GOTOFF_P(OP) \
+  (GET_CODE (OP) == CONST \
+   && (UNSPEC_GOTOFF_P (XEXP ((OP), 0)) \
+       || (GET_CODE (XEXP ((OP), 0)) == PLUS \
+	   && UNSPEC_GOTOFF_P (XEXP (XEXP ((OP), 0), 0)) \
+	   && CONST_INT_P (XEXP (XEXP ((OP), 0), 1)))))
+
+#define PIC_ADDR_P(OP) \
+  (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == UNSPEC \
+   && XINT (XEXP ((OP), 0), 1) == UNSPEC_PIC)
+
+#define PCREL_SYMOFF_P(OP) \
+  (GET_CODE (OP) == CONST \
+   && GET_CODE (XEXP ((OP), 0)) == UNSPEC \
+   && XINT (XEXP ((OP), 0), 1) == UNSPEC_PCREL_SYMOFF)
+
+#define NON_PIC_REFERENCE_P(OP) \
+  (GET_CODE (OP) == LABEL_REF || GET_CODE (OP) == SYMBOL_REF \
+   || (GET_CODE (OP) == CONST \
+       && (GET_CODE (XEXP ((OP), 0)) == LABEL_REF \
+	   || GET_CODE (XEXP ((OP), 0)) == SYMBOL_REF \
+	   || DATALABEL_REF_NO_CONST_P (XEXP ((OP), 0)))) \
+   || (GET_CODE (OP) == CONST && GET_CODE (XEXP ((OP), 0)) == PLUS \
+       && (GET_CODE (XEXP (XEXP ((OP), 0), 0)) == SYMBOL_REF \
+	   || GET_CODE (XEXP (XEXP ((OP), 0), 0)) == LABEL_REF \
+	   || DATALABEL_REF_NO_CONST_P (XEXP (XEXP ((OP), 0), 0))) \
+       && CONST_INT_P (XEXP (XEXP ((OP), 0), 1))))
+
+#define PIC_REFERENCE_P(OP) \
+  (GOT_ENTRY_P (OP) || GOTPLT_ENTRY_P (OP) \
+   || GOTOFF_P (OP) || PIC_ADDR_P (OP))
+
+#define MOVI_SHORI_BASE_OPERAND_P(OP) \
+  (flag_pic \
+   ? (GOT_ENTRY_P (OP) || GOTPLT_ENTRY_P (OP)  || GOTOFF_P (OP) \
+      || PCREL_SYMOFF_P (OP)) \
+   : NON_PIC_REFERENCE_P (OP))
+
+#define MAYBE_BASE_REGISTER_RTX_P(X, STRICT)			\
+  ((REG_P (X) && REG_OK_FOR_BASE_P (X, STRICT))	\
+   || (GET_CODE (X) == SUBREG					\
+       && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE ((X))),	\
+				 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (X)))) \
+       && REG_P (SUBREG_REG (X))			\
+       && REG_OK_FOR_BASE_P (SUBREG_REG (X), STRICT)))
+
+/* Since this must be r0, which is a single register class, we must check
+   SUBREGs more carefully, to be sure that we don't accept one that extends
+   outside the class.  */
+#define MAYBE_INDEX_REGISTER_RTX_P(X, STRICT)				\
+  ((REG_P (X) && REG_OK_FOR_INDEX_P (X, STRICT))	\
+   || (GET_CODE (X) == SUBREG					\
+       && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE ((X))), \
+				 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (X)))) \
+       && REG_P (SUBREG_REG (X))		\
+       && SUBREG_OK_FOR_INDEX_P (SUBREG_REG (X), SUBREG_BYTE (X), STRICT)))
+
+#ifdef REG_OK_STRICT
+#define BASE_REGISTER_RTX_P(X) MAYBE_BASE_REGISTER_RTX_P(X, true)
+#define INDEX_REGISTER_RTX_P(X) MAYBE_INDEX_REGISTER_RTX_P(X, true)
+#else
+#define BASE_REGISTER_RTX_P(X) MAYBE_BASE_REGISTER_RTX_P(X, false)
+#define INDEX_REGISTER_RTX_P(X) MAYBE_INDEX_REGISTER_RTX_P(X, false)
+#endif
+
+#define ALLOW_INDEXED_ADDRESS \
+  ((!TARGET_SHMEDIA32 && !TARGET_SHCOMPACT) || TARGET_ALLOW_INDEXED_ADDRESS)
+
+/* A C compound statement that attempts to replace X, which is an address
+   that needs reloading, with a valid memory address for an operand of
+   mode MODE.  WIN is a C statement label elsewhere in the code.  */
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)	\
+  do {									\
+    if (sh_legitimize_reload_address (&(X), (MODE), (OPNUM), (TYPE)))	\
+      goto WIN;								\
+  } while (0)
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE ((! optimize || TARGET_BIGTABLE) ? SImode : HImode)
+
+#define CASE_VECTOR_SHORTEN_MODE(MIN_OFFSET, MAX_OFFSET, BODY) \
+((MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 127 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, QImode) \
+ : (MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 255 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, QImode) \
+ : (MIN_OFFSET) >= -32768 && (MAX_OFFSET) <= 32767 ? HImode \
+ : SImode)
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+   Do not define this if the table should contain absolute addresses.  */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Define it here, so that it doesn't get bumped to 64-bits on SHmedia.  */
+#define FLOAT_TYPE_SIZE 32
+
+/* Since the SH2e has only `float' support, it is desirable to make all
+   floating point types equivalent to `float'.  */
+#define DOUBLE_TYPE_SIZE ((TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH2A_DOUBLE)\
+			  ? 32 : 64)
+
+/* 'char' is signed by default.  */
+#define DEFAULT_SIGNED_CHAR  1
+
+/* The type of size_t unsigned int.  */
+#define SIZE_TYPE (TARGET_SH5 ? "long unsigned int" : "unsigned int")
+
+#undef  PTRDIFF_TYPE
+#define PTRDIFF_TYPE (TARGET_SH5 ? "long int" : "int")
+
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+#define SH_ELF_WCHAR_TYPE "long int"
+
+/* Max number of bytes we can move from memory to memory
+   in one reasonably fast instruction.  */
+#define MOVE_MAX (TARGET_SHMEDIA ? 8 : 4)
+
+/* Maximum value possibly taken by MOVE_MAX.  Must be defined whenever
+   MOVE_MAX is not a compile-time constant.  */
+#define MAX_MOVE_MAX 8
+
+/* Max number of bytes we want move_by_pieces to be able to copy
+   efficiently.  */
+#define MOVE_MAX_PIECES (TARGET_SH4 || TARGET_SHMEDIA ? 8 : 4)
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.
+   For SHmedia, we can truncate to QImode easier using zero extension.
+   FP registers can load SImode values, but don't implicitly sign-extend
+   them to DImode.  */
+#define LOAD_EXTEND_OP(MODE) \
+ (((MODE) == QImode  && TARGET_SHMEDIA) ? ZERO_EXTEND \
+  : (MODE) != SImode ? SIGN_EXTEND : UNKNOWN)
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND
+
+/* Nonzero if access to memory by bytes is no faster than for words.  */
+#define SLOW_BYTE_ACCESS 1
+
+/* Nonzero if the target supports dynamic shift instructions
+   like shad and shld.  */
+#define TARGET_DYNSHIFT (TARGET_SH3 || TARGET_SH2A)
+
+/* The cost of using the dynamic shift insns (shad, shld) are the same
+   if they are available.  If they are not available a library function will
+   be emitted instead, which is more expensive.  */
+#define SH_DYNAMIC_SHIFT_COST (TARGET_DYNSHIFT ? 1 : 20)
+
+/* Defining SHIFT_COUNT_TRUNCATED tells the combine pass that code like
+   (X << (Y % 32)) for register X, Y is equivalent to (X << Y).
+   This is not generally true when hardware dynamic shifts (shad, shld) are
+   used, because they check the sign bit _before_ the modulo op.  The sign
+   bit determines whether it is a left shift or a right shift:
+     if (Y < 0)
+       return X << (Y & 31);
+     else
+       return X >> (-Y) & 31);
+ 
+   The dynamic shift library routines in lib1funcs.S do not use the sign bit
+   like the hardware dynamic shifts and truncate the shift count to 31.
+   We define SHIFT_COUNT_TRUNCATED to 0 and express the implied shift count
+   truncation in the library function call patterns, as this gives slightly
+   more compact code.  */
+#define SHIFT_COUNT_TRUNCATED (0)
+
+/* All integers have the same format so truncation is easy.  */
+/* But SHmedia must sign-extend DImode when truncating to SImode.  */
+#define TRULY_NOOP_TRUNCATION(OUTPREC,INPREC) \
+ (!TARGET_SHMEDIA || (INPREC) < 64 || (OUTPREC) >= 64)
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.  */
+/*#define NO_FUNCTION_CSE 1*/
+
+/* The machine modes of pointers and functions.  */
+#define Pmode  (TARGET_SHMEDIA64 ? DImode : SImode)
+#define FUNCTION_MODE  Pmode
+
+/* The multiply insn on the SH1 and the divide insns on the SH1 and SH2
+   are actually function calls with some special constraints on arguments
+   and register usage.
+
+   These macros tell reorg that the references to arguments and
+   register clobbers for insns of type sfunc do not appear to happen
+   until after the millicode call.  This allows reorg to put insns
+   which set the argument registers into the delay slot of the millicode
+   call -- thus they act more like traditional CALL_INSNs.
+
+   get_attr_is_sfunc will try to recognize the given insn, so make sure to
+   filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
+   in particular.  */
+
+#define INSN_SETS_ARE_DELAYED(X) 		\
+  ((NONJUMP_INSN_P (X)				\
+    && GET_CODE (PATTERN (X)) != SEQUENCE	\
+    && GET_CODE (PATTERN (X)) != USE		\
+    && GET_CODE (PATTERN (X)) != CLOBBER	\
+    && get_attr_is_sfunc (X)))
+
+#define INSN_REFERENCES_ARE_DELAYED(X) 		\
+  ((NONJUMP_INSN_P (X)				\
+    && GET_CODE (PATTERN (X)) != SEQUENCE	\
+    && GET_CODE (PATTERN (X)) != USE		\
+    && GET_CODE (PATTERN (X)) != CLOBBER	\
+    && get_attr_is_sfunc (X)))
+
+
+/* Position Independent Code.  */
+
+/* We can't directly access anything that contains a symbol,
+   nor can we indirect via the constant pool.  */
+#define LEGITIMATE_PIC_OPERAND_P(X)				\
+	((! nonpic_symbol_mentioned_p (X)			\
+	  && (GET_CODE (X) != SYMBOL_REF			\
+	      || ! CONSTANT_POOL_ADDRESS_P (X)			\
+	      || ! nonpic_symbol_mentioned_p (get_pool_constant (X)))) \
+	 || (TARGET_SHMEDIA && GET_CODE (X) == LABEL_REF))
+
+#define SYMBOLIC_CONST_P(X)	\
+((GET_CODE (X) == SYMBOL_REF || GET_CODE (X) == LABEL_REF)	\
+  && nonpic_symbol_mentioned_p (X))
+
+/* Compute extra cost of moving data between one register class
+   and another.  */
+
+/* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
+   uses this information.  Hence, the general register <-> floating point
+   register information here is not used for SFmode.  */
+#define REGCLASS_HAS_GENERAL_REG(CLASS) \
+  ((CLASS) == GENERAL_REGS || (CLASS) == R0_REGS || (CLASS) == NON_SP_REGS \
+    || (! TARGET_SHMEDIA && (CLASS) == SIBCALL_REGS))
+
+#define REGCLASS_HAS_FP_REG(CLASS) \
+  ((CLASS) == FP0_REGS || (CLASS) == FP_REGS \
+   || (CLASS) == DF_REGS)
+
+/* ??? Perhaps make MEMORY_MOVE_COST depend on compiler option?  This
+   would be so that people with slow memory systems could generate
+   different code that does fewer memory accesses.  */
+
+/* A C expression for the cost of a branch instruction.  A value of 1
+   is the default; other values are interpreted relative to that.  */
+#define BRANCH_COST(speed_p, predictable_p) sh_branch_cost
+
+/* Assembler output control.  */
+
+/* A C string constant describing how to begin a comment in the target
+   assembler language.  The compiler assumes that the comment will end at
+   the end of the line.  */
+#define ASM_COMMENT_START "!"
+
+#define ASM_APP_ON  		""
+#define ASM_APP_OFF  		""
+#define FILE_ASM_OP 		"\t.file\n"
+#define SET_ASM_OP		"\t.set\t"
+
+/* How to change between sections.  */
+#define TEXT_SECTION_ASM_OP	(TARGET_SHMEDIA32 \
+				? "\t.section\t.text..SHmedia32,\"ax\"" \
+				: "\t.text")
+#define DATA_SECTION_ASM_OP	"\t.data"
+
+#if defined CRT_BEGIN || defined CRT_END
+/* Arrange for TEXT_SECTION_ASM_OP to be a compile-time constant.  */
+# undef TEXT_SECTION_ASM_OP
+# if __SHMEDIA__ == 1 && __SH5__ == 32
+#  define TEXT_SECTION_ASM_OP "\t.section\t.text..SHmedia32,\"ax\""
+# else
+#  define TEXT_SECTION_ASM_OP "\t.text"
+# endif
+#endif
+
+#ifndef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
+#endif
+
+#ifndef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
+#endif
+
+/* Define this so that jump tables go in same section as the current function,
+   which could be text or it could be a user defined section.  */
+#define JUMP_TABLES_IN_TEXT_SECTION 1
+
+#undef DO_GLOBAL_CTORS_BODY
+#define DO_GLOBAL_CTORS_BODY			\
+{						\
+  typedef void (*pfunc) (void);			\
+  extern pfunc __ctors[];			\
+  extern pfunc __ctors_end[];			\
+  pfunc *p;					\
+  for (p = __ctors_end; p > __ctors; )		\
+    {						\
+      (*--p)();					\
+    }						\
+}
+
+#undef DO_GLOBAL_DTORS_BODY
+#define DO_GLOBAL_DTORS_BODY			\
+{						\
+  typedef void (*pfunc) (void);			\
+  extern pfunc __dtors[];			\
+  extern pfunc __dtors_end[];			\
+  pfunc *p;					\
+  for (p = __dtors; p < __dtors_end; p++)	\
+    {						\
+      (*p)();					\
+    }						\
+}
+
+#define ASM_OUTPUT_REG_PUSH(file, v) \
+{							\
+  if (TARGET_SHMEDIA)					\
+    {							\
+      fprintf ((file), "\taddi.l\tr15,-8,r15\n");	\
+      fprintf ((file), "\tst.q\tr15,0,r%d\n", (v));	\
+    }							\
+  else							\
+    fprintf ((file), "\tmov.l\tr%d,@-r15\n", (v));	\
+}
+
+#define ASM_OUTPUT_REG_POP(file, v) \
+{							\
+  if (TARGET_SHMEDIA)					\
+    {							\
+      fprintf ((file), "\tld.q\tr15,0,r%d\n", (v));	\
+      fprintf ((file), "\taddi.l\tr15,8,r15\n");	\
+    }							\
+  else							\
+    fprintf ((file), "\tmov.l\t@r15+,r%d\n", (v));	\
+}
+
+/* DBX register number for a given compiler register number.  */
+/* GDB has FPUL at 23 and FP0 at 25, so we must add one to all FP registers
+   to match gdb.  */
+/* expand_builtin_init_dwarf_reg_sizes uses this to test if a
+   register exists, so we should return -1 for invalid register numbers.  */
+#define DBX_REGISTER_NUMBER(REGNO) SH_DBX_REGISTER_NUMBER (REGNO)
+
+/* SHcompact PR_REG used to use the encoding 241, and SHcompact FP registers
+   used to use the encodings 245..260, but that doesn't make sense:
+   PR_REG and PR_MEDIA_REG are actually the same register, and likewise
+   the FP registers stay the same when switching between compact and media
+   mode.  Hence, we also need to use the same dwarf frame columns.
+   Likewise, we need to support unwind information for SHmedia registers
+   even in compact code.  */
+#define SH_DBX_REGISTER_NUMBER(REGNO) \
+  (IN_RANGE ((REGNO), \
+	     (unsigned HOST_WIDE_INT) FIRST_GENERAL_REG, \
+	     FIRST_GENERAL_REG + (TARGET_SH5 ? 63U :15U)) \
+   ? ((unsigned) (REGNO) - FIRST_GENERAL_REG) \
+   : ((int) (REGNO) >= FIRST_FP_REG \
+     && ((int) (REGNO) \
+	 <= (FIRST_FP_REG + \
+	     ((TARGET_SH5 && TARGET_FPU_ANY) ? 63 : TARGET_SH2E ? 15 : -1)))) \
+   ? ((unsigned) (REGNO) - FIRST_FP_REG \
+      + (TARGET_SH5 ? 77 : 25)) \
+   : XD_REGISTER_P (REGNO) \
+   ? ((unsigned) (REGNO) - FIRST_XD_REG + (TARGET_SH5 ? 289 : 87)) \
+   : TARGET_REGISTER_P (REGNO) \
+   ? ((unsigned) (REGNO) - FIRST_TARGET_REG + 68) \
+   : (REGNO) == PR_REG \
+   ? (TARGET_SH5 ? 18 : 17) \
+   : (REGNO) == PR_MEDIA_REG \
+   ? (TARGET_SH5 ? 18 : (unsigned) -1) \
+   : (REGNO) == GBR_REG \
+   ? (TARGET_SH5 ? 238 : 18) \
+   : (REGNO) == MACH_REG \
+   ? (TARGET_SH5 ? 239 : 20) \
+   : (REGNO) == MACL_REG \
+   ? (TARGET_SH5 ? 240 : 21) \
+   : (REGNO) == T_REG \
+   ? (TARGET_SH5 ? 242 : 22) \
+   : (REGNO) == FPUL_REG \
+   ? (TARGET_SH5 ? 244 : 23) \
+   : (REGNO) == FPSCR_REG \
+   ? (TARGET_SH5 ? 243 : 24) \
+   : (unsigned) -1)
+
+/* This is how to output a reference to a symbol_ref.  On SH5,
+   references to non-code symbols must be preceded by `datalabel'.  */
+#define ASM_OUTPUT_SYMBOL_REF(FILE,SYM)			\
+  do 							\
+    {							\
+      if (TARGET_SH5 && !SYMBOL_REF_FUNCTION_P (SYM))	\
+	fputs ("datalabel ", (FILE));			\
+      assemble_name ((FILE), XSTR ((SYM), 0));		\
+    }							\
+  while (0)
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf ((FILE), "\t.align %d\n", (LOG))
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+/* #define ASM_OUTPUT_CASE_END(STREAM,NUM,TABLE)  */
+
+/* Output a relative address table.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM,BODY,VALUE,REL)			\
+  switch (GET_MODE (BODY))						\
+    {									\
+    case SImode:							\
+      if (TARGET_SH5)							\
+	{								\
+	  asm_fprintf ((STREAM), "\t.long\t%LL%d-datalabel %LL%d\n",	\
+		       (VALUE), (REL));					\
+	  break;							\
+	}								\
+      asm_fprintf ((STREAM), "\t.long\t%LL%d-%LL%d\n", (VALUE),(REL));	\
+      break;								\
+    case HImode:							\
+      if (TARGET_SH5)							\
+	{								\
+	  asm_fprintf ((STREAM), "\t.word\t%LL%d-datalabel %LL%d\n",	\
+		       (VALUE), (REL));					\
+	  break;							\
+	}								\
+      asm_fprintf ((STREAM), "\t.word\t%LL%d-%LL%d\n", (VALUE),(REL));	\
+      break;								\
+    case QImode:							\
+      if (TARGET_SH5)							\
+	{								\
+	  asm_fprintf ((STREAM), "\t.byte\t%LL%d-datalabel %LL%d\n",	\
+		       (VALUE), (REL));					\
+	  break;							\
+	}								\
+      asm_fprintf ((STREAM), "\t.byte\t%LL%d-%LL%d\n", (VALUE),(REL));	\
+      break;								\
+    default:								\
+      break;								\
+    }
+
+/* Output an absolute table element.  */
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM,VALUE)  				\
+  if (! optimize || TARGET_BIGTABLE)					\
+    asm_fprintf ((STREAM), "\t.long\t%LL%d\n", (VALUE)); 		\
+  else									\
+    asm_fprintf ((STREAM), "\t.word\t%LL%d\n", (VALUE));
+
+
+/* A C statement to be executed just prior to the output of
+   assembler code for INSN, to modify the extracted operands so
+   they will be output differently.
+
+   Here the argument OPVEC is the vector containing the operands
+   extracted from INSN, and NOPERANDS is the number of elements of
+   the vector which contain meaningful data for this insn.
+   The contents of this vector are what will be used to convert the insn
+   template into assembler code, so you can change the assembler output
+   by changing the contents of the vector.  */
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
+  final_prescan_insn ((INSN), (OPVEC), (NOPERANDS))
+
+
+extern rtx sh_compare_op0;
+extern rtx sh_compare_op1;
+
+/* Which processor to schedule for.  The elements of the enumeration must
+   match exactly the cpu attribute in the sh.md file.  */
+enum processor_type {
+  PROCESSOR_SH1,
+  PROCESSOR_SH2,
+  PROCESSOR_SH2E,
+  PROCESSOR_SH2A,
+  PROCESSOR_SH3,
+  PROCESSOR_SH3E,
+  PROCESSOR_SH4,
+  PROCESSOR_SH4A,
+  PROCESSOR_SH5
+};
+
+#define sh_cpu_attr ((enum attr_cpu)sh_cpu)
+extern enum processor_type sh_cpu;
+
+enum mdep_reorg_phase_e
+{
+  SH_BEFORE_MDEP_REORG,
+  SH_INSERT_USES_LABELS,
+  SH_SHORTEN_BRANCHES0,
+  SH_FIXUP_PCLOAD,
+  SH_SHORTEN_BRANCHES1,
+  SH_AFTER_MDEP_REORG
+};
+
+extern enum mdep_reorg_phase_e mdep_reorg_phase;
+
+/* Handle Renesas compiler's pragmas.  */
+#define REGISTER_TARGET_PRAGMAS() do {					\
+  c_register_pragma (0, "interrupt", sh_pr_interrupt);			\
+  c_register_pragma (0, "trapa", sh_pr_trapa);				\
+  c_register_pragma (0, "nosave_low_regs", sh_pr_nosave_low_regs);	\
+} while (0)
+
+extern tree sh_deferred_function_attributes;
+extern tree *sh_deferred_function_attributes_tail;
+
+/* Set when processing a function with interrupt attribute.  */
+extern int current_function_interrupt;
+
+
+/* Instructions with unfilled delay slots take up an
+   extra two bytes for the nop in the delay slot.
+   sh-dsp parallel processing insns are four bytes long.  */
+#define ADJUST_INSN_LENGTH(X, LENGTH)				\
+  (LENGTH) += sh_insn_length_adjustment (X);
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.
+
+   Leaving the unsignedp unchanged gives better code than always setting it
+   to 0.  This is despite the fact that we have only signed char and short
+   load instructions.  */
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
+  if (GET_MODE_CLASS (MODE) == MODE_INT			\
+      && GET_MODE_SIZE (MODE) < 4/* ! UNITS_PER_WORD */)\
+    (UNSIGNEDP) = ((MODE) == SImode ? 0 : (UNSIGNEDP)),	\
+    (MODE) = (TARGET_SH1 ? SImode \
+	      : TARGET_SHMEDIA32 ? SImode : DImode);
+
+#define MAX_FIXED_MODE_SIZE (TARGET_SH5 ? 128 : 64)
+
+/* Better to allocate once the maximum space for outgoing args in the
+   prologue rather than duplicate around each call.  */
+#define ACCUMULATE_OUTGOING_ARGS TARGET_ACCUMULATE_OUTGOING_ARGS
+
+#define NUM_MODES_FOR_MODE_SWITCHING { FP_MODE_NONE }
+
+#define OPTIMIZE_MODE_SWITCHING(ENTITY) (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+
+#define ACTUAL_NORMAL_MODE(ENTITY) \
+  (TARGET_FPU_SINGLE ? FP_MODE_SINGLE : FP_MODE_DOUBLE)
+
+#define NORMAL_MODE(ENTITY) \
+  (sh_cfun_interrupt_handler_p () \
+   ? (TARGET_FMOVD ? FP_MODE_DOUBLE : FP_MODE_NONE) \
+   : ACTUAL_NORMAL_MODE (ENTITY))
+
+#define MODE_ENTRY(ENTITY) NORMAL_MODE (ENTITY)
+
+#define MODE_EXIT(ENTITY) \
+  (sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (ENTITY))
+
+#define EPILOGUE_USES(REGNO) ((TARGET_SH2E || TARGET_SH4) \
+			      && (REGNO) == FPSCR_REG)
+
+#define MODE_NEEDED(ENTITY, INSN)					\
+  (recog_memoized (INSN) >= 0						\
+   ? get_attr_fp_mode (INSN)						\
+   : FP_MODE_NONE)
+
+#define MODE_AFTER(ENTITY, MODE, INSN)		\
+     (TARGET_HITACHI				\
+      && recog_memoized (INSN) >= 0		\
+      && get_attr_fp_set (INSN) != FP_SET_NONE	\
+      ? (int) get_attr_fp_set (INSN)		\
+      : (MODE))
+
+#define MODE_PRIORITY_TO_MODE(ENTITY, N) \
+  ((TARGET_FPU_SINGLE != 0) ^ (N) ? FP_MODE_SINGLE : FP_MODE_DOUBLE)
+
+#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
+  fpscr_set_from_mem ((MODE), (HARD_REGS_LIVE))
+
+#define MD_CAN_REDIRECT_BRANCH(INSN, SEQ) \
+  sh_can_redirect_branch ((INSN), (SEQ))
+
+#define DWARF_FRAME_RETURN_COLUMN \
+  (TARGET_SH5 ? DWARF_FRAME_REGNUM (PR_MEDIA_REG) : DWARF_FRAME_REGNUM (PR_REG))
+
+#define EH_RETURN_DATA_REGNO(N)	\
+  ((N) < 4 ? (N) + (TARGET_SH5 ? 2U : 4U) : INVALID_REGNUM)
+
+#define EH_RETURN_STACKADJ_REGNO STATIC_CHAIN_REGNUM
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, EH_RETURN_STACKADJ_REGNO)
+
+/* We have to distinguish between code and data, so that we apply
+   datalabel where and only where appropriate.  Use sdataN for data.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+ ((flag_pic && (GLOBAL) ? DW_EH_PE_indirect : 0) \
+  | (flag_pic ? DW_EH_PE_pcrel : DW_EH_PE_absptr) \
+  | ((CODE) ? 0 : (TARGET_SHMEDIA64 ? DW_EH_PE_sdata8 : DW_EH_PE_sdata4)))
+
+/* Handle special EH pointer encodings.  Absolute, pc-relative, and
+   indirect are handled automatically.  */
+#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
+  do { \
+    if (((ENCODING) & 0xf) != DW_EH_PE_sdata4 \
+	&& ((ENCODING) & 0xf) != DW_EH_PE_sdata8) \
+      { \
+	gcc_assert (GET_CODE (ADDR) == SYMBOL_REF); \
+	SYMBOL_REF_FLAGS (ADDR) |= SYMBOL_FLAG_FUNCTION; \
+	if (0) goto DONE; \
+      } \
+  } while (0)
+
+#if (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__
+/* SH constant pool breaks the devices in crtstuff.c to control section
+   in where code resides.  We have to write it as asm code.  */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+   asm (SECTION_OP "\n\
+	mov.l	1f,r1\n\
+	mova	2f,r0\n\
+	braf	r1\n\
+	lds	r0,pr\n\
+0:	.p2align 2\n\
+1:	.long	" USER_LABEL_PREFIX #FUNC " - 0b\n\
+2:\n" TEXT_SECTION_ASM_OP);
+#endif /* (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__ */
+
+#endif /* ! GCC_SH_H */
diff --git a/gcc-4.9/gcc/config/sh/sh.md b/gcc-4.9/gcc/config/sh/sh.md
new file mode 100644
index 000000000..ab1f0a51c
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh.md
@@ -0,0 +1,15960 @@
+;;- Machine description for Renesas / SuperH SH.
+;;  Copyright (C) 1993-2014 Free Software Foundation, Inc.
+;;  Contributed by Steve Chamberlain (sac@cygnus.com).
+;;  Improved by Jim Wilson (wilson@cygnus.com).
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; ??? Should prepend a * to all pattern names which are not used.
+;; This will make the compiler smaller, and rebuilds after changes faster.
+
+;; ??? Should be enhanced to include support for many more GNU superoptimizer
+;; sequences.  Especially the sequences for arithmetic right shifts.
+
+;; ??? Should check all DImode patterns for consistency and usefulness.
+
+;; ??? The MAC.W and MAC.L instructions are not supported.  There is no
+;; way to generate them.
+
+;; BSR is not generated by the compiler proper, but when relaxing, it
+;; generates .uses pseudo-ops that allow linker relaxation to create
+;; BSR.  This is actually implemented in bfd/{coff,elf32}-sh.c
+
+;; Special constraints for SH machine description:
+;;
+;;    t -- T
+;;    x -- mac
+;;    l -- pr
+;;    z -- r0
+;;
+;; Special formats used for outputting SH instructions:
+;;
+;;   %.  --  print a .s if insn needs delay slot
+;;   %@  --  print rte/rts if is/isn't an interrupt function
+;;   %#  --  output a nop if there is nothing to put in the delay slot
+;;   %O  --  print a constant without the #
+;;   %R  --  print the lsw reg of a double
+;;   %S  --  print the msw reg of a double
+;;   %T  --  print next word of a double REG or MEM
+;;
+;; Special predicates:
+;;
+;;  arith_operand          -- operand is valid source for arithmetic op
+;;  arith_reg_operand      -- operand is valid register for arithmetic op
+;;  general_movdst_operand -- operand is valid move destination
+;;  general_movsrc_operand -- operand is valid move source
+;;  logical_operand        -- operand is valid source for logical op
+
+;; -------------------------------------------------------------------------
+;; Constants
+;; -------------------------------------------------------------------------
+
+(define_constants [
+  (AP_REG	145)
+  (PR_REG	146)
+  (T_REG	147)
+  (GBR_REG	144)
+  (MACH_REG	148)
+  (MACL_REG	149)
+  (FPUL_REG	150)
+  (RAP_REG	152)
+
+  (FPSCR_REG	151)
+
+  (PIC_REG	12)
+  (FP_REG	14)
+  (SP_REG	15)
+
+  (PR_MEDIA_REG	18)
+  (T_MEDIA_REG	19)
+
+  (R0_REG	0)
+  (R1_REG	1)
+  (R2_REG	2)
+  (R3_REG	3)
+  (R4_REG	4)
+  (R5_REG	5)
+  (R6_REG	6)
+  (R7_REG	7)
+  (R8_REG	8)
+  (R9_REG	9)
+  (R10_REG	10)
+  (R20_REG	20)
+  (R21_REG	21)
+  (R22_REG	22)
+  (R23_REG	23)
+
+  (DR0_REG	64)
+  (DR2_REG	66)
+  (DR4_REG	68)
+  (FR23_REG	87)
+
+  (TR0_REG	128)
+  (TR1_REG	129)
+  (TR2_REG	130)
+
+  (XD0_REG	136)
+
+  ;; These are used with unspec.
+  (UNSPEC_COMPACT_ARGS	0)
+  (UNSPEC_MOVA		1)
+  (UNSPEC_CASESI	2)
+  (UNSPEC_DATALABEL	3)
+  (UNSPEC_BBR		4)
+  (UNSPEC_SFUNC		5)
+  (UNSPEC_PIC		6)
+  (UNSPEC_GOT		7)
+  (UNSPEC_GOTOFF	8)
+  (UNSPEC_PLT		9)
+  (UNSPEC_CALLER	10)
+  (UNSPEC_GOTPLT	11)
+  (UNSPEC_ICACHE	12)
+  (UNSPEC_INIT_TRAMP	13)
+  (UNSPEC_FCOSA		14)
+  (UNSPEC_FSRRA		15)
+  (UNSPEC_FSINA		16)
+  (UNSPEC_NSB		17)
+  (UNSPEC_ALLOCO	18)
+  (UNSPEC_TLSGD		20)
+  (UNSPEC_TLSLDM	21)
+  (UNSPEC_TLSIE		22)
+  (UNSPEC_DTPOFF	23)
+  (UNSPEC_GOTTPOFF	24)
+  (UNSPEC_TPOFF		25)
+  (UNSPEC_RA		26)
+  (UNSPEC_DIV_INV_M0	30)
+  (UNSPEC_DIV_INV_M1	31)
+  (UNSPEC_DIV_INV_M2	32)
+  (UNSPEC_DIV_INV_M3	33)
+  (UNSPEC_DIV_INV20	34)
+  (UNSPEC_DIV_INV_TABLE	37)
+  (UNSPEC_ASHIFTRT	35)
+  (UNSPEC_THUNK		36)
+  (UNSPEC_CHKADD	38)
+  (UNSPEC_SP_SET	40)
+  (UNSPEC_SP_TEST	41)
+  (UNSPEC_MOVUA		42)
+
+  ;; (unspec [VAL SHIFT] UNSPEC_EXTRACT_S16) computes (short) (VAL >> SHIFT).
+  ;; UNSPEC_EXTRACT_U16 is the unsigned equivalent.
+  (UNSPEC_EXTRACT_S16	43)
+  (UNSPEC_EXTRACT_U16	44)
+
+  ;; (unspec [TARGET ANCHOR] UNSPEC_SYMOFF) == TARGET - ANCHOR.
+  (UNSPEC_SYMOFF	45)
+
+  ;; (unspec [OFFSET ANCHOR] UNSPEC_PCREL_SYMOFF) == OFFSET - (ANCHOR - .).
+  (UNSPEC_PCREL_SYMOFF	46)
+
+  ;; Misc builtins
+  (UNSPEC_BUILTIN_STRLEN 47)
+
+  ;; These are used with unspec_volatile.
+  (UNSPECV_BLOCKAGE	0)
+  (UNSPECV_ALIGN	1)
+  (UNSPECV_CONST2	2)
+  (UNSPECV_CONST4	4)
+  (UNSPECV_CONST8	6)
+  (UNSPECV_WINDOW_END	10)
+  (UNSPECV_CONST_END	11)
+  (UNSPECV_EH_RETURN	12)
+  (UNSPECV_GBR		13)
+  (UNSPECV_SP_SWITCH_B  14)
+  (UNSPECV_SP_SWITCH_E  15)
+])
+
+;; -------------------------------------------------------------------------
+;; Attributes
+;; -------------------------------------------------------------------------
+
+;; Target CPU.
+
+(define_attr "cpu"
+ "sh1,sh2,sh2e,sh2a,sh3,sh3e,sh4,sh4a,sh5"
+  (const (symbol_ref "sh_cpu_attr")))
+
+(define_attr "endian" "big,little"
+ (const (if_then_else (symbol_ref "TARGET_LITTLE_ENDIAN")
+		      (const_string "little") (const_string "big"))))
+
+;; Indicate if the default fpu mode is single precision.
+(define_attr "fpu_single" "yes,no"
+  (const (if_then_else (symbol_ref "TARGET_FPU_SINGLE")
+		       (const_string "yes") (const_string "no"))))
+
+(define_attr "fmovd" "yes,no"
+  (const (if_then_else (symbol_ref "TARGET_FMOVD")
+		       (const_string "yes") (const_string "no"))))
+;; pipeline model
+(define_attr "pipe_model" "sh1,sh4,sh5media"
+  (const
+   (cond [(symbol_ref "TARGET_SHMEDIA") (const_string "sh5media")
+          (symbol_ref "TARGET_SUPERSCALAR") (const_string "sh4")]
+         (const_string "sh1"))))
+
+;; cbranch	conditional branch instructions
+;; jump		unconditional jumps
+;; arith	ordinary arithmetic
+;; arith3	a compound insn that behaves similarly to a sequence of
+;;		three insns of type arith
+;; arith3b	like above, but might end with a redirected branch
+;; load		from memory
+;; load_si	Likewise, SImode variant for general register.
+;; fload	Likewise, but load to fp register.
+;; store	to memory
+;; fstore	floating point register to memory
+;; move		general purpose register to register
+;; movi8	8-bit immediate to general purpose register
+;; mt_group	other sh4 mt instructions
+;; fmove	register to register, floating point
+;; smpy		word precision integer multiply
+;; dmpy		longword or doublelongword precision integer multiply
+;; return	rts
+;; pload	load of pr reg, which can't be put into delay slot of rts
+;; prset	copy register to pr reg, ditto
+;; pstore	store of pr reg, which can't be put into delay slot of jsr
+;; prget	copy pr to register, ditto
+;; pcload	pc relative load of constant value
+;; pcfload	Likewise, but load to fp register.
+;; pcload_si	Likewise, SImode variant for general register.
+;; rte		return from exception
+;; sfunc	special function call with known used registers
+;; call		function call
+;; fp		floating point
+;; fpscr_toggle	toggle a bit in the fpscr
+;; fdiv		floating point divide (or square root)
+;; gp_fpul	move from general purpose register to fpul
+;; fpul_gp	move from fpul to general purpose register
+;; mac_gp	move from mac[lh] to general purpose register
+;; gp_mac	move from general purpose register to mac[lh]
+;; mac_mem	move from mac[lh] to memory
+;; mem_mac	move from memory to mac[lh]
+;; dfp_arith,dfp_mul, fp_cmp,dfp_cmp,dfp_conv
+;; ftrc_s	fix_truncsfsi2_i4
+;; dfdiv	double precision floating point divide (or square root)
+;; cwb		ic_invalidate_line_i
+;; movua	SH4a unaligned load
+;; fsrra	square root reciprocal approximate
+;; fsca		sine and cosine approximate
+;; tls_load     load TLS related address
+;; arith_media	SHmedia arithmetic, logical, and shift instructions
+;; cbranch_media SHmedia conditional branch instructions
+;; cmp_media	SHmedia compare instructions
+;; dfdiv_media	SHmedia double precision divide and square root
+;; dfmul_media	SHmedia double precision multiply instruction
+;; dfparith_media SHmedia double precision floating point arithmetic
+;; dfpconv_media SHmedia double precision floating point conversions
+;; dmpy_media	SHmedia longword multiply
+;; fcmp_media	SHmedia floating point compare instructions
+;; fdiv_media	SHmedia single precision divide and square root
+;; fload_media	SHmedia floating point register load instructions
+;; fmove_media	SHmedia floating point register moves (inc. fabs and fneg)
+;; fparith_media SHmedia single precision floating point arithmetic
+;; fpconv_media	SHmedia single precision floating point conversions
+;; fstore_media	SHmedia floating point register store instructions
+;; gettr_media	SHmedia gettr instruction
+;; invalidate_line_media SHmedia invalidate_line sequence
+;; jump_media	SHmedia unconditional branch instructions
+;; load_media	SHmedia general register load instructions
+;; pt_media	SHmedia pt instruction (expanded by assembler)
+;; ptabs_media	SHmedia ptabs instruction
+;; store_media	SHmedia general register store instructions
+;; mcmp_media	SHmedia multimedia compare, absolute, saturating ops
+;; mac_media	SHmedia mac-style fixed point operations
+;; d2mpy_media	SHmedia: two 32-bit integer multiplies
+;; atrans_media	SHmedia approximate transcendental functions
+;; ustore_media	SHmedia unaligned stores
+;; nil		no-op move, will be deleted.
+
+(define_attr "type"
+ "mt_group,cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,load,load_si,
+  fload,store,fstore,move,movi8,fmove,smpy,dmpy,return,pload,prset,pstore,
+  prget,pcload,pcload_si,pcfload,rte,sfunc,call,fp,fpscr_toggle,fdiv,ftrc_s,
+  dfp_arith,dfp_mul,fp_cmp,dfp_cmp,dfp_conv,dfdiv,gp_fpul,fpul_gp,mac_gp,
+  gp_mac,mac_mem,mem_mac,mem_fpscr,gp_fpscr,cwb,movua,fsrra,fsca,tls_load,
+  arith_media,cbranch_media,cmp_media,dfdiv_media,dfmul_media,dfparith_media,
+  dfpconv_media,dmpy_media,fcmp_media,fdiv_media,fload_media,fmove_media,
+  fparith_media,fpconv_media,fstore_media,gettr_media,invalidate_line_media,
+  jump_media,load_media,pt_media,ptabs_media,store_media,mcmp_media,mac_media,
+  d2mpy_media,atrans_media,ustore_media,nil,other"
+  (const_string "other"))
+
+;; We define a new attribute namely "insn_class".We use
+;; this for the DFA based pipeline description.
+;;
+;; mt_group      SH4 "mt" group instructions.
+;;
+;; ex_group      SH4 "ex" group instructions.
+;;
+;; ls_group      SH4 "ls" group instructions.
+;;
+(define_attr "insn_class"
+  "mt_group,ex_group,ls_group,br_group,fe_group,co_group,none"
+  (cond [(eq_attr "type" "move,mt_group") (const_string "mt_group")
+	 (eq_attr "type" "movi8,arith,dyn_shift") (const_string "ex_group")
+	 (eq_attr "type" "fmove,load,pcload,load_si,pcload_si,fload,pcfload,
+			  store,fstore,gp_fpul,fpul_gp") (const_string "ls_group")
+	 (eq_attr "type" "cbranch,jump") (const_string "br_group")
+	 (eq_attr "type" "fp,fp_cmp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv")
+	   (const_string "fe_group")
+	 (eq_attr "type" "jump_ind,smpy,dmpy,mac_gp,return,pload,prset,pstore,
+			  prget,rte,sfunc,call,dfp_cmp,mem_fpscr,gp_fpscr,cwb,
+			  gp_mac,mac_mem,mem_mac") (const_string "co_group")]
+	(const_string "none")))
+
+;; nil are zero instructions, and arith3 / arith3b are multiple instructions,
+;; so these do not belong in an insn group, although they are modeled
+;; with their own define_insn_reservations.
+
+;; Indicate what precision must be selected in fpscr for this insn, if any.
+(define_attr "fp_mode" "single,double,none" (const_string "none"))
+
+;; Indicate if the fpu mode is set by this instruction
+;; "unknown" must have the value as "none" in fp_mode, and means
+;; that the instruction/abi has left the processor in an unknown
+;; state.
+;; "none" means that nothing has changed and no mode is set.
+;; This attribute is only used for the Renesas ABI.
+(define_attr "fp_set" "single,double,unknown,none" (const_string "none"))
+
+; If a conditional branch destination is within -252..258 bytes away
+; from the instruction it can be 2 bytes long.  Something in the
+; range -4090..4100 bytes can be 6 bytes long.  All other conditional
+; branches are initially assumed to be 16 bytes long.
+; In machine_dependent_reorg, we split all branches that are longer than
+; 2 bytes.
+
+;; The maximum range used for SImode constant pool entries is 1018.  A final
+;; instruction can add 8 bytes while only being 4 bytes in size, thus we
+;; can have a total of 1022 bytes in the pool.  Add 4 bytes for a branch
+;; instruction around the pool table, 2 bytes of alignment before the table,
+;; and 30 bytes of alignment after the table.  That gives a maximum total
+;; pool size of 1058 bytes.
+;; Worst case code/pool content size ratio is 1:2 (using asms).
+;; Thus, in the worst case, there is one instruction in front of a maximum
+;; sized pool, and then there are 1052 bytes of pool for every 508 bytes of
+;; code.  For the last n bytes of code, there are 2n + 36 bytes of pool.
+;; If we have a forward branch, the initial table will be put after the
+;; unconditional branch.
+;;
+;; ??? We could do much better by keeping track of the actual pcloads within
+;; the branch range and in the pcload range in front of the branch range.
+
+;; ??? This looks ugly because genattrtab won't allow if_then_else or cond
+;; inside an le.
+(define_attr "short_cbranch_p" "no,yes"
+  (cond [(match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 506))
+	 (const_string "yes")
+	 (match_test "NEXT_INSN (PREV_INSN (insn)) != insn")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 252)) (const_int 508))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "med_branch_p" "no,yes"
+  (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 990))
+	      (const_int 1988))
+	 (const_string "yes")
+	 (match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 4092))
+	      (const_int 8186))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "med_cbranch_p" "no,yes"
+  (cond [(leu (plus (minus (match_dup 0) (pc)) (const_int 988))
+	      (const_int 1986))
+	 (const_string "yes")
+	 (match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 4090))
+	       (const_int 8184))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "braf_branch_p" "no,yes"
+  (cond [(match_test "! TARGET_SH2")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 10330))
+	      (const_int 20660))
+	 (const_string "yes")
+	 (match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 32764))
+	      (const_int 65530))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "braf_cbranch_p" "no,yes"
+  (cond [(match_test "! TARGET_SH2")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 10328))
+	      (const_int 20658))
+	 (const_string "yes")
+	 (match_test "mdep_reorg_phase <= SH_FIXUP_PCLOAD")
+	 (const_string "no")
+	 (leu (plus (minus (match_dup 0) (pc)) (const_int 32762))
+	      (const_int 65528))
+	 (const_string "yes")
+	 ] (const_string "no")))
+
+;; An unconditional jump in the range -4092..4098 can be 2 bytes long.
+;; For wider ranges, we need a combination of a code and a data part.
+;; If we can get a scratch register for a long range jump, the code
+;; part can be 4 bytes long; otherwise, it must be 8 bytes long.
+;; If the jump is in the range -32764..32770, the data part can be 2 bytes
+;; long; otherwise, it must be 6 bytes long.
+
+;; All other instructions are two bytes long by default.
+
+;; ??? This should use something like *branch_p (minus (match_dup 0) (pc)),
+;; but getattrtab doesn't understand this.
+(define_attr "length" ""
+  (cond [(eq_attr "type" "cbranch")
+	 (cond [(eq_attr "short_cbranch_p" "yes")
+		(const_int 2)
+		(eq_attr "med_cbranch_p" "yes")
+		(const_int 6)
+		(eq_attr "braf_cbranch_p" "yes")
+		(const_int 12)
+;; ??? using pc is not computed transitively.
+		(ne (match_dup 0) (match_dup 0))
+		(const_int 14)
+		(match_test "flag_pic")
+		(const_int 24)
+		] (const_int 16))
+	 (eq_attr "type" "jump")
+	 (cond [(eq_attr "med_branch_p" "yes")
+		(const_int 2)
+		(and (match_test "prev_nonnote_insn (insn)")
+		     (and (eq (symbol_ref "GET_CODE (prev_nonnote_insn (insn))")
+			      (symbol_ref "INSN"))
+			  (eq (symbol_ref "INSN_CODE (prev_nonnote_insn (insn))")
+			      (symbol_ref "code_for_indirect_jump_scratch"))))
+		(cond [(eq_attr "braf_branch_p" "yes")
+		       (const_int 6)
+		       (not (match_test "flag_pic"))
+		       (const_int 10)
+		       (match_test "TARGET_SH2")
+		       (const_int 10)] (const_int 18))
+		(eq_attr "braf_branch_p" "yes")
+		(const_int 10)
+;; ??? using pc is not computed transitively.
+		(ne (match_dup 0) (match_dup 0))
+		(const_int 12)
+		(match_test "flag_pic")
+		(const_int 22)
+		] (const_int 14))
+	 (eq_attr "type" "pt_media")
+	 (if_then_else (match_test "TARGET_SHMEDIA64")
+		       (const_int 20) (const_int 12))
+	 (and (eq_attr "type" "jump_media")
+	      (match_test "TARGET_SH5_CUT2_WORKAROUND"))
+	 (const_int 8)
+	 ] (if_then_else (match_test "TARGET_SHMEDIA")
+			 (const_int 4)
+			 (const_int 2))))
+
+;; DFA descriptions for the pipelines
+
+(include "sh1.md")
+(include "shmedia.md")
+(include "sh4.md")
+
+(include "iterators.md")
+(include "predicates.md")
+(include "constraints.md")
+
+;; Definitions for filling delay slots
+
+(define_attr "needs_delay_slot" "yes,no" (const_string "no"))
+
+(define_attr "banked" "yes,no" 
+	(cond [(match_test "sh_loads_bankedreg_p (insn)")
+	       (const_string "yes")]
+	      (const_string "no")))
+
+;; ??? This should be (nil) instead of (const_int 0)
+(define_attr "hit_stack" "yes,no"
+	(cond [(not (match_test "find_regno_note (insn, REG_INC, SP_REG)"))
+	       (const_string "no")]
+	      (const_string "yes")))
+
+(define_attr "interrupt_function" "no,yes"
+  (const (symbol_ref "current_function_interrupt")))
+
+(define_attr "in_delay_slot" "yes,no"
+  (cond [(eq_attr "type" "cbranch") (const_string "no")
+	 (eq_attr "type" "pcload,pcload_si") (const_string "no")
+	 (eq_attr "needs_delay_slot" "yes") (const_string "no")
+	 (eq_attr "length" "2") (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "cond_delay_slot" "yes,no"
+  (cond [(eq_attr "in_delay_slot" "yes") (const_string "yes")
+	 ] (const_string "no")))
+
+(define_attr "is_sfunc" ""
+  (if_then_else (eq_attr "type" "sfunc") (const_int 1) (const_int 0)))
+
+(define_attr "is_mac_media" ""
+  (if_then_else (eq_attr "type" "mac_media") (const_int 1) (const_int 0)))
+
+(define_attr "branch_zero" "yes,no"
+  (cond [(eq_attr "type" "!cbranch") (const_string "no")
+	 (ne (symbol_ref "(next_active_insn (insn)\
+			   == (prev_active_insn\
+			       (XEXP (SET_SRC (PATTERN (insn)), 1))))\
+			  && get_attr_length (next_active_insn (insn)) == 2")
+	     (const_int 0))
+	 (const_string "yes")]
+	(const_string "no")))
+
+;; SH4 Double-precision computation with double-precision result -
+;; the two halves are ready at different times.
+(define_attr "dfp_comp" "yes,no"
+  (cond [(eq_attr "type" "dfp_arith,dfp_mul,dfp_conv,dfdiv") (const_string "yes")]
+	(const_string "no")))
+
+;; Insns for which the latency of a preceding fp insn is decreased by one.
+(define_attr "late_fp_use" "yes,no" (const_string "no"))
+;; And feeding insns for which this relevant.
+(define_attr "any_fp_comp" "yes,no"
+  (cond [(eq_attr "type" "fp,fdiv,ftrc_s,dfp_arith,dfp_mul,dfp_conv,dfdiv")
+	 (const_string "yes")]
+	(const_string "no")))
+
+(define_attr "any_int_load" "yes,no"
+  (cond [(eq_attr "type" "load,load_si,pcload,pcload_si")
+	 (const_string "yes")]
+	(const_string "no")))
+
+(define_attr "highpart" "user, ignore, extend, depend, must_split"
+  (const_string "user"))
+
+(define_delay
+  (eq_attr "needs_delay_slot" "yes")
+  [(eq_attr "in_delay_slot" "yes") (nil) (nil)])
+
+;; Since a normal return (rts) implicitly uses the PR register,
+;; we can't allow PR register loads in an rts delay slot.
+;; On the SH1* and SH2*, the rte instruction reads the return pc from the
+;; stack, and thus we can't put a pop instruction in its delay slot.
+;; On the SH3* and SH4*, the rte instruction does not use the stack, so a
+;; pop instruction can go in the delay slot, unless it references a banked
+;; register (the register bank is switched by rte).
+(define_delay
+  (eq_attr "type" "return")
+  [(and (eq_attr "in_delay_slot" "yes")
+	(ior (and (eq_attr "interrupt_function" "no")
+		  (eq_attr "type" "!pload,prset"))
+	     (and (eq_attr "interrupt_function" "yes")
+		  (ior (match_test "TARGET_SH3") (eq_attr "hit_stack" "no"))
+		  (eq_attr "banked" "no"))))
+   (nil) (nil)])
+
+;; Since a call implicitly uses the PR register, we can't allow
+;; a PR register store in a jsr delay slot.
+
+(define_delay
+  (ior (eq_attr "type" "call") (eq_attr "type" "sfunc"))
+  [(and (eq_attr "in_delay_slot" "yes")
+	(eq_attr "type" "!pstore,prget")) (nil) (nil)])
+
+;; Say that we have annulled true branches, since this gives smaller and
+;; faster code when branches are predicted as not taken.
+
+;; ??? The non-annulled condition should really be "in_delay_slot",
+;; but insns that can be filled in non-annulled get priority over insns
+;; that can only be filled in anulled.
+
+(define_delay
+  (and (eq_attr "type" "cbranch")
+       (match_test "TARGET_SH2"))
+  ;; SH2e has a hardware bug that pretty much prohibits the use of
+  ;; annulled delay slots.
+  [(eq_attr "cond_delay_slot" "yes") (and (eq_attr "cond_delay_slot" "yes")
+					  (not (eq_attr "cpu" "sh2e"))) (nil)])
+
+;; -------------------------------------------------------------------------
+;; SImode signed integer comparisons
+;; -------------------------------------------------------------------------
+
+;; Various patterns to generate the TST #imm, R0 instruction.
+;; Although this adds some pressure on the R0 register, it can potentially
+;; result in faster code, even if the operand has to be moved to R0 first.
+;; This is because on SH4 TST #imm, R0 and MOV Rm, Rn are both MT group 
+;; instructions and thus will be executed in parallel.  On SH4A TST #imm, R0
+;; is an EX group instruction but still can be executed in parallel with the
+;; MT group MOV Rm, Rn instruction.
+
+;; Usual TST #imm, R0 patterns for SI, HI and QI
+;; This is usually used for bit patterns other than contiguous bits 
+;; and single bits.
+(define_insn "tstsi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (and:SI (match_operand:SI 0 "logical_operand" "%z,r")
+		       (match_operand:SI 1 "logical_operand" "K08,r"))
+	       (const_int 0)))]
+  "TARGET_SH1"
+  "tst	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+(define_insn "tsthi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (subreg:SI (and:HI (match_operand:HI 0 "logical_operand" "%z")
+				  (match_operand 1 "const_int_operand")) 0)
+	       (const_int 0)))]
+  "TARGET_SH1
+   && CONST_OK_FOR_K08 (INTVAL (operands[1]))"
+  "tst	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+(define_insn "tstqi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (subreg:SI (and:QI (match_operand:QI 0 "logical_operand" "%z")
+				  (match_operand 1 "const_int_operand")) 0)
+	       (const_int 0)))]
+  "TARGET_SH1
+   && (CONST_OK_FOR_K08 (INTVAL (operands[1])) 
+       || CONST_OK_FOR_I08 (INTVAL (operands[1])))"
+{
+  operands[1] = GEN_INT (INTVAL (operands[1]) & 255);
+  return "tst	%1,%0";
+}
+  [(set_attr "type" "mt_group")])
+
+;; Test low QI subreg against zero.
+;; This avoids unnecessary zero extension before the test.
+(define_insn "*tstqi_t_zero"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:QI 0 "logical_operand" "z") (const_int 0)))]
+  "TARGET_SH1"
+  "tst	#255,%0"
+  [(set_attr "type" "mt_group")])
+
+;; This pattern might be risky because it also tests the upper bits and not
+;; only the subreg.  However, it seems that combine will get to this only
+;; when testing sign/zero extended values.  In this case the extended upper
+;; bits do not matter.
+(define_insn "*tst<mode>_t_zero"
+  [(set (reg:SI T_REG)
+	(eq:SI
+	  (subreg:QIHI
+	    (and:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+		    (match_operand:SI 1 "arith_reg_operand" "r")) <lowpart_le>)
+	  (const_int 0)))]
+  "TARGET_SH1 && TARGET_LITTLE_ENDIAN"
+  "tst	%0,%1"
+  [(set_attr "type" "mt_group")])
+
+(define_insn "*tst<mode>_t_zero"
+  [(set (reg:SI T_REG)
+	(eq:SI
+	  (subreg:QIHI
+	    (and:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+		    (match_operand:SI 1 "arith_reg_operand" "r")) <lowpart_be>)
+	  (const_int 0)))]
+  "TARGET_SH1 && TARGET_BIG_ENDIAN"
+  "tst	%0,%1"
+  [(set_attr "type" "mt_group")])
+
+;; Extract LSB, negate and store in T bit.
+(define_insn "tstsi_t_and_not"
+  [(set (reg:SI T_REG)
+	 (and:SI (not:SI (match_operand:SI 0 "logical_operand" "z"))
+		 (const_int 1)))]
+  "TARGET_SH1"
+  "tst	#1,%0"
+  [(set_attr "type" "mt_group")])
+
+;; Extract contiguous bits and compare them against zero.
+(define_insn "tst<mode>_t_zero_extract_eq"
+  [(set (reg:SI T_REG)
+	(eq:SI (zero_extract:SI (match_operand:QIHISIDI 0 "logical_operand" "z")
+				(match_operand:SI 1 "const_int_operand")
+				(match_operand:SI 2 "const_int_operand"))
+	       (const_int 0)))]
+  "TARGET_SH1
+   && CONST_OK_FOR_K08 (ZERO_EXTRACT_ANDMASK (operands[1], operands[2]))"
+{
+  operands[1] = GEN_INT (ZERO_EXTRACT_ANDMASK (operands[1], operands[2]));
+  return "tst	%1,%0";
+}
+  [(set_attr "type" "mt_group")])
+
+;; This split is required when testing bits in a QI subreg.
+(define_split
+  [(set (reg:SI T_REG)
+	(eq:SI
+	  (if_then_else:SI
+	    (zero_extract:SI (match_operand 0 "logical_operand")
+			     (match_operand 1 "const_int_operand")
+			     (match_operand 2 "const_int_operand"))
+	    (match_operand 3 "const_int_operand")
+	    (const_int 0))
+	  (const_int 0)))]
+  "TARGET_SH1
+   && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) == INTVAL (operands[3])
+   && CONST_OK_FOR_K08 (INTVAL (operands[3]))"
+  [(set (reg:SI T_REG) (eq:SI (and:SI (match_dup 0) (match_dup 3))
+			      (const_int 0)))]
+{
+  if (GET_MODE (operands[0]) == QImode)
+    operands[0] = simplify_gen_subreg (SImode, operands[0], QImode, 0);
+})
+
+;; Extract single bit, negate and store it in the T bit.
+;; Not used for SH4A.
+(define_insn "tstsi_t_zero_extract_xor"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (xor:SI (match_operand:SI 0 "logical_operand" "z")
+				 (match_operand:SI 3 "const_int_operand"))
+			 (match_operand:SI 1 "const_int_operand")
+			 (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_SH1
+   && ZERO_EXTRACT_ANDMASK (operands[1], operands[2]) == INTVAL (operands[3])
+   && CONST_OK_FOR_K08 (INTVAL (operands[3]))"
+  "tst	%3,%0"
+  [(set_attr "type" "mt_group")])
+
+;; Extract single bit, negate and store it in the T bit.
+;; Used for SH4A little endian.
+(define_insn "tstsi_t_zero_extract_subreg_xor_little"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI
+	 (subreg:QI (xor:SI (match_operand:SI 0 "logical_operand" "z")
+			    (match_operand:SI 3 "const_int_operand")) 0)
+	 (match_operand:SI 1 "const_int_operand")
+	 (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_SH1 && TARGET_LITTLE_ENDIAN
+   && ZERO_EXTRACT_ANDMASK (operands[1], operands[2])
+      == (INTVAL (operands[3]) & 255)
+   && CONST_OK_FOR_K08 (INTVAL (operands[3]) & 255)"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) & 255);
+  return "tst	%3,%0";
+}
+  [(set_attr "type" "mt_group")])
+
+;; Extract single bit, negate and store it in the T bit.
+;; Used for SH4A big endian.
+(define_insn "tstsi_t_zero_extract_subreg_xor_big"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI
+	 (subreg:QI (xor:SI (match_operand:SI 0 "logical_operand" "z")
+			    (match_operand:SI 3 "const_int_operand")) 3)
+	 (match_operand:SI 1 "const_int_operand")
+	 (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_SH1 && TARGET_BIG_ENDIAN
+   && ZERO_EXTRACT_ANDMASK (operands[1], operands[2])
+      == (INTVAL (operands[3]) & 255)
+   && CONST_OK_FOR_K08 (INTVAL (operands[3]) & 255)"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) & 255);
+  return "tst	%3,%0";
+}
+  [(set_attr "type" "mt_group")])
+
+(define_insn "cmpeqsi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:SI 0 "arith_reg_operand" "r,z,r")
+	       (match_operand:SI 1 "arith_operand" "N,rI08,r")))]
+  "TARGET_SH1"
+  "@
+	tst	%0,%0
+	cmp/eq	%1,%0
+	cmp/eq	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+;; FIXME: For some reason, on SH4A and SH2A combine fails to simplify this
+;; pattern by itself.  What this actually does is:
+;;	x == 0: (1 >> 0-0) & 1 = 1
+;;	x != 0: (1 >> 0-x) & 1 = 0
+;; Without this the test pr51244-8.c fails on SH2A and SH4A.
+(define_insn_and_split "*cmpeqsi_t"
+  [(set (reg:SI T_REG)
+	(and:SI (lshiftrt:SI
+		  (const_int 1)
+		  (neg:SI (match_operand:SI 0 "arith_reg_operand" "r")))
+		(const_int 1)))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG) (eq:SI (match_dup 0) (const_int 0)))])
+
+(define_insn "cmpgtsi_t"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:SI 0 "arith_reg_operand" "r,r")
+	       (match_operand:SI 1 "arith_reg_or_0_operand" "N,r")))]
+  "TARGET_SH1"
+  "@
+	cmp/pl	%0
+	cmp/gt	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+(define_insn "cmpgesi_t"
+  [(set (reg:SI T_REG)
+	(ge:SI (match_operand:SI 0 "arith_reg_operand" "r,r")
+	       (match_operand:SI 1 "arith_reg_or_0_operand" "N,r")))]
+  "TARGET_SH1"
+  "@
+	cmp/pz	%0
+	cmp/ge	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+;; FIXME: This is actually wrong.  There is no way to literally move a
+;; general reg to t reg.  Luckily, it seems that this pattern will be only
+;; used when the general reg is known be either '0' or '1' during combine.
+;; What we actually need is reg != 0 -> T, but we have only reg == 0 -> T.
+;; Due to interactions with other patterns, combine fails to pick the latter
+;; and invert the dependent logic.
+(define_insn "*negtstsi"
+  [(set (reg:SI T_REG) (match_operand:SI 0 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "cmp/pl	%0"
+  [(set_attr "type" "mt_group")])
+
+;; Some integer sign comparison patterns can be realized with the div0s insn.
+;;	div0s	Rm,Rn		T = (Rm >> 31) ^ (Rn >> 31)
+(define_insn "cmp_div0s_0"
+  [(set (reg:SI T_REG)
+	(lshiftrt:SI (xor:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+			     (match_operand:SI 1 "arith_reg_operand" "r"))
+		     (const_int 31)))]
+  "TARGET_SH1"
+  "div0s	%0,%1"
+  [(set_attr "type" "arith")])
+
+(define_insn "cmp_div0s_1"
+  [(set (reg:SI T_REG)
+	(lt:SI (xor:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+		       (match_operand:SI 1 "arith_reg_operand" "r"))
+	       (const_int 0)))]
+  "TARGET_SH1"
+  "div0s	%0,%1"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "*cmp_div0s_0"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(lshiftrt:SI (xor:SI (match_operand:SI 1 "arith_reg_operand" "")
+			     (match_operand:SI 2 "arith_reg_operand" ""))
+		     (const_int 31)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG)
+	(lshiftrt:SI (xor:SI (match_dup 1) (match_dup 2)) (const_int 31)))
+   (set (match_dup 0) (reg:SI T_REG))])
+
+(define_insn "*cmp_div0s_0"
+  [(set (reg:SI T_REG)
+	(eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand")
+			    (const_int 31))
+	       (ge:SI (match_operand:SI 1 "arith_reg_operand")
+		      (const_int 0))))]
+  "TARGET_SH1"
+  "div0s	%0,%1"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "*cmp_div0s_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(ge:SI (xor:SI (match_operand:SI 1 "arith_reg_operand" "")
+		       (match_operand:SI 2 "arith_reg_operand" ""))
+	       (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+;; We have to go through the movnegt expander here which will handle the
+;; SH2A vs non-SH2A cases.
+{
+  emit_insn (gen_cmp_div0s_1 (operands[1], operands[2]));
+  emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn_and_split "*cmp_div0s_1"
+  [(set (reg:SI T_REG)
+	(ge:SI (xor:SI (match_operand:SI 0 "arith_reg_operand" "")
+		       (match_operand:SI 1 "arith_reg_operand" ""))
+	       (const_int 0)))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 0) (match_dup 1))
+			      (const_int 0)))
+   (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))])
+
+(define_insn_and_split "*cmp_div0s_1"
+  [(set (reg:SI T_REG)
+	(eq:SI (lshiftrt:SI (match_operand:SI 0 "arith_reg_operand")
+			    (const_int 31))
+	       (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+			    (const_int 31))))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 0) (match_dup 1))
+			      (const_int 0)))
+   (set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))])
+
+;; -------------------------------------------------------------------------
+;; SImode compare and branch
+;; -------------------------------------------------------------------------
+
+(define_expand "cbranchsi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:SI 1 "arith_operand" "")
+			 (match_operand:SI 2 "arith_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    emit_jump_insn (gen_cbranchint4_media (operands[0], operands[1],
+					   operands[2], operands[3]));
+  else
+    expand_cbranchsi4 (operands, LAST_AND_UNUSED_RTX_CODE, -1);
+
+  DONE;
+})
+
+;; Combine patterns to invert compare and branch operations for which we
+;; don't have actual comparison insns.  These patterns are used in cases
+;; which appear after the initial cbranchsi expansion, which also does
+;; some condition inversion.
+(define_split
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "arith_reg_operand" "")
+			  (match_operand:SI 1 "arith_reg_or_0_operand" ""))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (eq:SI (match_dup 0) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))])
+
+;; FIXME: Similar to the *cmpeqsi_t pattern above, for some reason, on SH4A
+;; and SH2A combine fails to simplify this pattern by itself.
+;; What this actually does is:
+;;	x == 0: (1 >> 0-0) & 1 = 1
+;;	x != 0: (1 >> 0-x) & 1 = 0
+;; Without this the test pr51244-8.c fails on SH2A and SH4A.
+(define_split
+  [(set (pc)
+	(if_then_else
+	  (eq (and:SI (lshiftrt:SI
+			(const_int 1)
+			(neg:SI (match_operand:SI 0 "arith_reg_operand" "")))
+		      (const_int 1))
+	      (const_int 0))
+	  (label_ref (match_operand 2))
+	  (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (eq:SI (match_dup 0) (const_int 0)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))])
+
+;; FIXME: These could probably use code iterators for the compare op.
+(define_split
+  [(set (pc)
+	(if_then_else (le (match_operand:SI 0 "arith_reg_operand" "")
+			  (match_operand:SI 1 "arith_reg_or_0_operand" ""))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (gt:SI (match_dup 0) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))])
+
+(define_split
+  [(set (pc)
+	(if_then_else (lt (match_operand:SI 0 "arith_reg_operand" "")
+			  (match_operand:SI 1 "arith_reg_or_0_operand" ""))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (ge:SI (match_dup 0) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))])
+
+(define_split
+  [(set (pc)
+	(if_then_else (leu (match_operand:SI 0 "arith_reg_operand" "")
+			   (match_operand:SI 1 "arith_reg_operand" ""))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (gtu:SI (match_dup 0) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))])
+
+(define_split
+  [(set (pc)
+	(if_then_else (ltu (match_operand:SI 0 "arith_reg_operand" "")
+			   (match_operand:SI 1 "arith_reg_operand" ""))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (geu:SI (match_dup 0) (match_dup 1)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 2))
+			   (pc)))])
+
+;; Compare and branch combine patterns for div0s comparisons.
+(define_insn_and_split "*cbranch_div0s"
+  [(set (pc)
+	(if_then_else (lt (xor:SI (match_operand:SI 0 "arith_reg_operand" "")
+				  (match_operand:SI 1 "arith_reg_operand" ""))
+			  (const_int 0))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG)
+	(lt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 0)))
+   (set (pc)
+	(if_then_else (ne (reg:SI T_REG) (const_int 0))
+		      (label_ref (match_dup 2))
+		      (pc)))])
+
+(define_insn_and_split "*cbranch_div0s"
+  [(set (pc)
+	(if_then_else (ge (xor:SI (match_operand:SI 0 "arith_reg_operand" "")
+				  (match_operand:SI 1 "arith_reg_operand" ""))
+			  (const_int 0))
+		      (label_ref (match_operand 2))
+		      (pc)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG)
+	(lt:SI (xor:SI (match_dup 0) (match_dup 1)) (const_int 0)))
+   (set (pc)
+	(if_then_else (eq (reg:SI T_REG) (const_int 0))
+		      (label_ref (match_dup 2))
+		      (pc)))])
+
+;; Conditional move combine pattern for div0s comparisons.
+;; This is used when TARGET_PRETEND_CMOVE is in effect.
+(define_insn_and_split "*movsicc_div0s"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(if_then_else:SI (ge (xor:SI (match_operand:SI 1 "arith_reg_operand" "")
+				     (match_operand:SI 2 "arith_reg_operand" ""))
+			     (const_int 0))
+			 (match_operand:SI 3 "arith_reg_operand" "")
+			 (match_operand:SI 4 "general_movsrc_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_PRETEND_CMOVE"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 1) (match_dup 2))
+			      (const_int 0)))
+   (set (match_dup 0)
+	(if_then_else (ne (reg:SI T_REG) (const_int 0))
+		      (match_dup 4)
+		      (match_dup 3)))])
+
+(define_insn_and_split "*movsicc_div0s"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(if_then_else:SI (eq (lshiftrt:SI
+				(match_operand:SI 1 "arith_reg_operand")
+				(const_int 31))
+			     (lshiftrt:SI
+				(match_operand:SI 2 "arith_reg_operand")
+				(const_int 31)))
+			 (match_operand:SI 3 "arith_reg_operand")
+			 (match_operand:SI 4 "general_movsrc_operand")))
+   (clobber (reg:SI T_REG))]
+   "TARGET_PRETEND_CMOVE"
+   "#"
+   "&& 1"
+  [(set (reg:SI T_REG) (lt:SI (xor:SI (match_dup 1) (match_dup 2))
+			      (const_int 0)))
+   (set (match_dup 0)
+	(if_then_else (ne (reg:SI T_REG) (const_int 0))
+		      (match_dup 4)
+		      (match_dup 3)))])
+
+;; -------------------------------------------------------------------------
+;; SImode unsigned integer comparisons
+;; -------------------------------------------------------------------------
+
+;; Usually comparisons of 'unsigned int >= 0' are optimized away completely.
+;; However, especially when optimizations are off (e.g. -O0) such comparisons
+;; might remain and we have to handle them.  If the '>= 0' case wasn't
+;; handled here, something else would just load a '0' into the second operand
+;; and do the comparison.  We can do slightly better by just setting the
+;; T bit to '1'.
+(define_insn_and_split "cmpgeusi_t"
+  [(set (reg:SI T_REG)
+	(geu:SI (match_operand:SI 0 "arith_reg_operand" "r")
+		(match_operand:SI 1 "arith_reg_or_0_operand" "r")))]
+  "TARGET_SH1"
+  "cmp/hs	%1,%0"
+  "&& satisfies_constraint_Z (operands[1])"
+  [(set (reg:SI T_REG) (const_int 1))]
+  ""
+  [(set_attr "type" "mt_group")])
+
+(define_insn "cmpgtusi_t"
+  [(set (reg:SI T_REG)
+	(gtu:SI (match_operand:SI 0 "arith_reg_operand" "r")
+		(match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "cmp/hi	%1,%0"
+  [(set_attr "type" "mt_group")])
+
+;; -------------------------------------------------------------------------
+;; DImode compare and branch
+;; -------------------------------------------------------------------------
+
+;; arith3 patterns don't work well with the sh4-300 branch prediction mechanism.
+;; Therefore, we aim to have a set of three branches that go straight to the
+;; destination, i.e. only one of them is taken at any one time.
+;; This mechanism should also be slightly better for the sh4-200.
+
+(define_expand "cbranchdi4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:DI 1 "arith_operand" "")
+			 (match_operand:DI 2 "arith_operand" "")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (match_dup 4))
+   (clobber (reg:SI T_REG))]
+  "TARGET_CBRANCHDI4 || TARGET_SH2 || TARGET_SHMEDIA"
+{
+  enum rtx_code comparison;
+
+  if (TARGET_SHMEDIA)
+    {
+      emit_jump_insn (gen_cbranchint4_media (operands[0], operands[1],
+					     operands[2], operands[3]));
+      DONE;
+    }
+  else if (!TARGET_CBRANCHDI4)
+    {
+      sh_emit_compare_and_branch (operands, DImode);
+      DONE;
+    }
+  else
+    {
+      if (expand_cbranchdi4 (operands, LAST_AND_UNUSED_RTX_CODE))
+	DONE;
+
+      comparison = prepare_cbranch_operands (operands, DImode,
+					     LAST_AND_UNUSED_RTX_CODE);
+      if (comparison != GET_CODE (operands[0]))
+	operands[0]
+	  = gen_rtx_fmt_ee (comparison, VOIDmode, operands[1], operands[2]);
+       operands[4] = gen_rtx_SCRATCH (SImode);
+    }
+})
+
+(define_insn_and_split "cbranchdi4_i"
+  [(set (pc)
+	(if_then_else (match_operator 0 "comparison_operator"
+			[(match_operand:DI 1 "arith_operand" "r,r")
+			 (match_operand:DI 2 "arith_operand" "rN,I08")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (match_scratch:SI 4 "=X,&r"))
+   (clobber (reg:SI T_REG))]
+  "TARGET_CBRANCHDI4"
+  "#"
+  "&& reload_completed"
+  [(pc)]
+{
+  if (!expand_cbranchdi4 (operands, GET_CODE (operands[0])))
+    FAIL;
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; DImode signed integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn ""
+  [(set (reg:SI T_REG)
+	(eq:SI (and:DI (match_operand:DI 0 "arith_reg_operand" "r")
+		       (match_operand:DI 1 "arith_operand" "r"))
+	       (const_int 0)))]
+  "TARGET_SH1"
+{
+  return output_branchy_insn (EQ, "tst\t%S1,%S0;bf\t%l9;tst\t%R1,%R0",
+			      insn, operands);
+}
+  [(set_attr "length" "6")
+   (set_attr "type" "arith3b")])
+
+(define_insn "cmpeqdi_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+	       (match_operand:DI 1 "arith_reg_or_0_operand" "N,r")))]
+  "TARGET_SH1"
+{
+  static const char* alt[] =
+  {
+       "tst	%S0,%S0"	"\n"
+    "	bf	0f"		"\n"
+    "	tst	%R0,%R0"	"\n"
+    "0:",
+
+       "cmp/eq	%S1,%S0"	"\n"
+    "	bf	0f"		"\n"
+    "	cmp/eq	%R1,%R0"	"\n"
+    "0:"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "length" "6")
+   (set_attr "type" "arith3b")])
+
+(define_split
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:DI 0 "arith_reg_operand" "")
+	       (match_operand:DI 1 "arith_reg_or_0_operand" "")))]
+;; If we applied this split when not optimizing, it would only be
+;; applied during the machine-dependent reorg, when no new basic blocks
+;; may be created.
+  "TARGET_SH1 && reload_completed && optimize"
+  [(set (reg:SI T_REG) (eq:SI (match_dup 2) (match_dup 3)))
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 6))
+			   (pc)))
+   (set (reg:SI T_REG) (eq:SI (match_dup 4) (match_dup 5)))
+   (match_dup 6)]
+{
+  operands[2] = gen_highpart (SImode, operands[0]);
+  operands[3] = operands[1] == const0_rtx
+		? const0_rtx
+		: gen_highpart (SImode, operands[1]);
+  operands[4] = gen_lowpart (SImode, operands[0]);
+  operands[5] = gen_lowpart (SImode, operands[1]);
+  operands[6] = gen_label_rtx ();
+})
+
+(define_insn "cmpgtdi_t"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+	       (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]
+  "TARGET_SH2"
+{
+  static const char* alt[] =
+  {
+       "cmp/eq	%S1,%S0"	"\n"
+    "	bf{.|/}s	0f"	"\n"
+    "	cmp/gt	%S1,%S0"	"\n"
+    "	cmp/hi	%R1,%R0"	"\n"
+    "0:",
+
+        "tst	%S0,%S0"	"\n"
+    "	bf{.|/}s	0f"	"\n"
+    "	cmp/pl	%S0"		"\n"
+    "	cmp/hi	%S0,%R0"	"\n"
+    "0:"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "arith3")])
+
+(define_insn "cmpgedi_t"
+  [(set (reg:SI T_REG)
+	(ge:SI (match_operand:DI 0 "arith_reg_operand" "r,r")
+	       (match_operand:DI 1 "arith_reg_or_0_operand" "r,N")))]
+  "TARGET_SH2"
+{
+  static const char* alt[] =
+  {
+       "cmp/eq	%S1,%S0"	"\n"
+    "	bf{.|/}s	0f"	"\n"
+    "	cmp/ge	%S1,%S0"	"\n"
+    "	cmp/hs	%R1,%R0"	"\n"
+    "0:",
+
+       "cmp/pz	%S0"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "length" "8,2")
+   (set_attr "type" "arith3,mt_group")])
+
+;; -------------------------------------------------------------------------
+;; DImode unsigned integer comparisons
+;; -------------------------------------------------------------------------
+
+(define_insn "cmpgeudi_t"
+  [(set (reg:SI T_REG)
+	(geu:SI (match_operand:DI 0 "arith_reg_operand" "r")
+		(match_operand:DI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH2"
+{
+  return       "cmp/eq	%S1,%S0"	"\n"
+	 "	bf{.|/}s	0f"	"\n"
+	 "	cmp/hs	%S1,%S0"	"\n"
+	 "	cmp/hs	%R1,%R0"	"\n"
+	 "0:";
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "arith3")])
+
+(define_insn "cmpgtudi_t"
+  [(set (reg:SI T_REG)
+	(gtu:SI (match_operand:DI 0 "arith_reg_operand" "r")
+		(match_operand:DI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH2"
+{
+  return       "cmp/eq	%S1,%S0"	"\n"
+	 "	bf{.|/}s	0f"	"\n"
+	 "	cmp/hi	%S1,%S0"	"\n"
+	 "	cmp/hi	%R1,%R0"	"\n"
+	 "0:";
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "arith3")])
+
+(define_insn "cmpeqsi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:SI 1 "logical_operand" "%r")
+	       (match_operand:SI 2 "cmp_operand" "Nr")))]
+  "TARGET_SHMEDIA"
+  "cmpeq	%1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpeqdi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:DI 1 "register_operand" "%r")
+	       (match_operand:DI 2 "cmp_operand" "Nr")))]
+  "TARGET_SHMEDIA"
+  "cmpeq	%1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpgtsi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gt:SI (match_operand:SI 1 "cmp_operand" "Nr")
+	       (match_operand:SI 2 "cmp_operand" "rN")))]
+  "TARGET_SHMEDIA"
+  "cmpgt	%N1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpgtdi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gt:SI (match_operand:DI 1 "arith_reg_or_0_operand" "Nr")
+	       (match_operand:DI 2 "arith_reg_or_0_operand" "rN")))]
+  "TARGET_SHMEDIA"
+  "cmpgt	%N1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpgtusi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gtu:SI (match_operand:SI 1 "cmp_operand" "Nr")
+		(match_operand:SI 2 "cmp_operand" "rN")))]
+  "TARGET_SHMEDIA"
+  "cmpgtu	%N1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+(define_insn "cmpgtudi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gtu:SI (match_operand:DI 1 "arith_reg_or_0_operand" "Nr")
+		(match_operand:DI 2 "arith_reg_or_0_operand" "rN")))]
+  "TARGET_SHMEDIA"
+  "cmpgtu	%N1, %N2, %0"
+  [(set_attr "type" "cmp_media")])
+
+; This pattern is for combine.
+(define_insn "*cmpne0sisi_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ne:SI (match_operand:SI 1 "arith_reg_operand" "r") (const_int 0)))]
+  "TARGET_SHMEDIA"
+  "cmpgtu	%1,r63,%0"
+  [(set_attr "type" "cmp_media")])
+
+;; -------------------------------------------------------------------------
+;; Conditional move instructions
+;; -------------------------------------------------------------------------
+
+;; The insn names may seem reversed, but note that cmveq performs the move
+;; if op1 == 0, and cmvne does it if op1 != 0.
+
+(define_insn "movdicc_false"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(if_then_else:DI (eq (match_operand:DI 1 "arith_reg_operand" "r")
+			     (const_int 0))
+	 (match_operand:DI 2 "arith_reg_or_0_operand" "rN")
+	 (match_operand:DI 3 "arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA"
+  "cmveq	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "movdicc_true"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(if_then_else:DI (ne (match_operand:DI 1 "arith_reg_operand" "r")
+			     (const_int 0))
+	 (match_operand:DI 2 "arith_reg_or_0_operand" "rN")
+	 (match_operand:DI 3 "arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA"
+  "cmvne	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_peephole2
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(if_then_else:DI (match_operator 3 "equality_comparison_operator"
+			   [(match_operand:DI 1 "arith_reg_operand" "")
+			    (const_int 0)])
+	 (match_operand:DI 2 "arith_reg_dest" "")
+	 (match_dup 0)))
+   (set (match_dup 2) (match_dup 0))]
+  "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(if_then_else:DI (match_dup 3) (match_dup 0) (match_dup 2)))]
+{
+  operands[3] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[3])),
+				VOIDmode, operands[1], CONST0_RTX (DImode));
+})
+
+(define_peephole2
+  [(set (match_operand:DI 0 "general_movdst_operand" "")
+	(match_operand:DI 1 "arith_reg_or_0_operand" ""))
+   (set (match_operand:DI 2 "arith_reg_dest" "")
+	(if_then_else:DI (match_operator 4 "equality_comparison_operator"
+			   [(match_operand:DI 3 "arith_reg_operand" "")
+			    (const_int 0)])
+	 (match_dup 0)
+	 (match_dup 2)))]
+  "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(if_then_else:DI (match_dup 4) (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_expand "movdicc"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(if_then_else:DI (match_operand 1 "comparison_operator" "")
+			 (match_operand:DI 2 "register_operand" "")
+			 (match_operand:DI 3 "register_operand" "")))]
+  "TARGET_SHMEDIA"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == DImode
+      && XEXP (operands[1], 1) == const0_rtx)
+    ;
+  else
+    {
+      if (!can_create_pseudo_p ())
+	FAIL;
+
+      operands[1] = sh_emit_cheap_store_flag (GET_MODE (operands[0]),
+					      GET_CODE (operands[1]),
+					      XEXP (operands[1], 0),
+					      XEXP (operands[1], 1));
+      if (!operands[1])
+	FAIL;
+    }
+})
+
+;; Add SImode variants for cmveq / cmvne to compensate for not promoting
+;; SImode to DImode.
+(define_insn "movsicc_false"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(if_then_else:SI (eq (match_operand:SI 1 "arith_reg_operand" "r")
+			  (const_int 0))
+	 (match_operand:SI 2 "arith_reg_or_0_operand" "rN")
+	 (match_operand:SI 3 "arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA"
+  "cmveq	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "movsicc_true"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(if_then_else:SI (ne (match_operand:SI 1 "arith_reg_operand" "r")
+			  (const_int 0))
+	 (match_operand:SI 2 "arith_reg_or_0_operand" "rN")
+	 (match_operand:SI 3 "arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA"
+  "cmvne	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(if_then_else:SI (match_operator 3 "equality_comparison_operator"
+			   [(match_operand:SI 1 "arith_reg_operand" "")
+			    (const_int 0)])
+	 (match_operand:SI 2 "arith_reg_dest" "")
+	 (match_dup 0)))
+   (set (match_dup 2) (match_dup 0))]
+  "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(if_then_else:SI (match_dup 3) (match_dup 0) (match_dup 2)))]
+{
+  operands[3] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[3])),
+				VOIDmode, operands[1], CONST0_RTX (SImode));
+})
+
+(define_peephole2
+  [(set (match_operand:SI 0 "general_movdst_operand" "")
+	(match_operand:SI 1 "arith_reg_or_0_operand" ""))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(if_then_else:SI (match_operator 4 "equality_comparison_operator"
+			   [(match_operand:SI 3 "arith_reg_operand" "")
+			    (const_int 0)])
+	 (match_dup 0)
+	 (match_dup 2)))]
+  "TARGET_SHMEDIA && peep2_reg_dead_p (2, operands[0])
+   && (!REG_P (operands[1]) || GENERAL_REGISTER_P (REGNO (operands[1])))"
+  [(set (match_dup 2)
+	(if_then_else:SI (match_dup 4) (match_dup 1) (match_dup 2)))]
+{
+  replace_rtx (operands[4], operands[0], operands[1]);
+})
+
+(define_peephole2
+  [(set (match_operand 0 "any_register_operand" "")
+	(match_operand 1 "any_register_operand" ""))
+   (set (match_operand 2 "any_register_operand" "") (match_operand 3 "" ""))
+   (set (match_operand 4 "" "") (match_operand 5 "" ""))]
+  "(HARD_REGNO_NREGS (REGNO (operands[0]), GET_MODE (operands[2]))
+    <= HARD_REGNO_NREGS (REGNO (operands[0]), GET_MODE (operands[0])))
+   && peep2_reg_dead_p (3, operands[0]) && peep2_reg_dead_p (3, operands[2])
+   && ! FIND_REG_INC_NOTE (peep2_next_insn (2), operands[0])
+   && ! FIND_REG_INC_NOTE (peep2_next_insn (2), operands[2])
+   && ! reg_overlap_mentioned_p (operands[0], operands[3])
+   && ! reg_overlap_mentioned_p (operands[2], operands[0])
+   && ! reg_overlap_mentioned_p (operands[0], operands[1])
+   && (REGNO_REG_CLASS (REGNO (operands[0]))
+       == REGNO_REG_CLASS (REGNO (operands[2])))
+   && (REGNO_REG_CLASS (REGNO (operands[1]))
+       == REGNO_REG_CLASS (REGNO (operands[0])))"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  rtx set1, set2, insn2;
+  rtx replacements[4];
+
+  /* We want to replace occurrences of operands[0] with operands[1] and
+     operands[2] with operands[0] in operands[4]/operands[5].
+     Doing just two replace_rtx calls naively would result in the second
+     replacement undoing all that the first did if operands[1] and operands[2]
+     are identical, so we must do this simultaneously.  */
+  replacements[0] = operands[0];
+  replacements[1] = operands[1];
+  replacements[2] = operands[2];
+  replacements[3] = operands[0];
+  if (!replace_n_hard_rtx (operands[5], replacements, 2, 0)
+      || !replace_n_hard_rtx (operands[4], replacements, 2, 0)
+      || !replace_n_hard_rtx (operands[2], replacements, 2, 0))
+    FAIL;
+
+  operands[5] = replace_n_hard_rtx (operands[5], replacements, 2, 1);
+  replace_n_hard_rtx (operands[4], replacements, 2, 1);
+  operands[2] = replace_n_hard_rtx (operands[2], replacements, 2, 1);
+  /* The operands array is aliased to recog_data.operand, which gets
+     clobbered by extract_insn, so finish with it now.  */
+  set1 = gen_rtx_SET (VOIDmode, operands[2], operands[3]);
+  set2 = gen_rtx_SET (VOIDmode, operands[4], operands[5]);
+  /* ??? The last insn might be a jump insn, but the generic peephole2 code
+     always uses emit_insn.  */
+  /* Check that we don't violate matching constraints or earlyclobbers.  */
+  extract_insn (emit_insn (set1));
+  if (! constrain_operands (1))
+    goto failure;
+  insn2 = emit (set2);
+  if (GET_CODE (insn2) == BARRIER)
+    goto failure;
+  extract_insn (insn2);
+  if (! constrain_operands (1))
+    {
+      rtx tmp;
+    failure:
+      tmp = replacements[0];
+      replacements[0] = replacements[1];
+      replacements[1] = tmp;
+      tmp = replacements[2];
+      replacements[2] = replacements[3];
+      replacements[3] = tmp;
+      replace_n_hard_rtx (SET_DEST (set1), replacements, 2, 1);
+      replace_n_hard_rtx (SET_DEST (set2), replacements, 2, 1);
+      replace_n_hard_rtx (SET_SRC (set2), replacements, 2, 1);
+      FAIL;
+    }
+  DONE;
+})
+
+;; The register allocator is rather clumsy in handling multi-way conditional
+;; moves, so allow the combiner to make them, and we split them up after
+;; reload.  */
+(define_insn_and_split "*movsicc_umin"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=&r")
+	(umin:SI (if_then_else:SI
+		   (eq (match_operand:SI 1 "arith_reg_operand" "r")
+		       (const_int 0))
+		   (match_operand:SI 2 "arith_reg_or_0_operand" "rN")
+		   (match_operand:SI 3 "register_operand" "0"))
+		 (match_operand:SI 4 "arith_reg_or_0_operand" "r")))
+   (clobber (match_scratch:SI 5 "=&r"))]
+  "TARGET_SHMEDIA && !can_create_pseudo_p ()"
+  "#"
+  "TARGET_SHMEDIA && reload_completed"
+  [(pc)]
+{
+  emit_insn (gen_movsicc_false (operands[0], operands[1], operands[2],
+				operands[3]));
+  emit_insn (gen_cmpgtusi_media (operands[5], operands[4], operands[0]));
+  emit_insn (gen_movsicc_false (operands[0], operands[5], operands[4],
+				operands[0]));
+  DONE;
+})
+
+(define_insn "*movsicc_t_false"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(if_then_else (eq (reg:SI T_REG) (const_int 0))
+		      (match_operand:SI 1 "general_movsrc_operand" "r,I08")
+		      (match_operand:SI 2 "arith_reg_operand" "0,0")))]
+  "TARGET_PRETEND_CMOVE
+   && (arith_reg_operand (operands[1], SImode)
+       || (immediate_operand (operands[1], SImode)
+	   && satisfies_constraint_I08 (operands[1])))"
+{
+  return       "bt	0f"	"\n"
+	 "	mov	%1,%0"	"\n"
+	 "0:";
+}
+  [(set_attr "type" "mt_group,arith") ;; poor approximation
+   (set_attr "length" "4")])
+
+(define_insn "*movsicc_t_true"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(if_then_else (ne (reg:SI T_REG) (const_int 0))
+		      (match_operand:SI 1 "general_movsrc_operand" "r,I08")
+		      (match_operand:SI 2 "arith_reg_operand" "0,0")))]
+  "TARGET_PRETEND_CMOVE
+   && (arith_reg_operand (operands[1], SImode)
+       || (immediate_operand (operands[1], SImode)
+	   && satisfies_constraint_I08 (operands[1])))"
+{
+  return       "bf	0f"	"\n"
+	 "	mov	%1,%0"	"\n"
+	 "0:";
+}
+  [(set_attr "type" "mt_group,arith") ;; poor approximation
+   (set_attr "length" "4")])
+
+(define_expand "movsicc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(if_then_else:SI (match_operand 1 "comparison_operator" "")
+			 (match_operand:SI 2 "arith_reg_or_0_operand" "")
+			 (match_operand:SI 3 "arith_reg_operand" "")))]
+  "TARGET_SHMEDIA || TARGET_PRETEND_CMOVE"
+{
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && GET_MODE (XEXP (operands[1], 0)) == SImode
+      && (TARGET_SHMEDIA
+	  || (REG_P (XEXP (operands[1], 0))
+	      && REGNO (XEXP (operands[1], 0)) == T_REG))
+      && XEXP (operands[1], 1) == const0_rtx)
+    ;
+
+  else if (TARGET_PRETEND_CMOVE)
+    {
+      enum rtx_code code = GET_CODE (operands[1]);
+      enum rtx_code new_code = code;
+      rtx op0 = XEXP (operands[1], 0);
+      rtx op1 = XEXP (operands[1], 1);
+
+      if (! currently_expanding_to_rtl)
+	FAIL;
+      switch (code)
+	{
+	case LT: case LE: case LEU: case LTU:
+	  if (GET_MODE_CLASS (GET_MODE (op0)) != MODE_INT)
+	    break;
+	case NE:
+	  new_code = reverse_condition (code);
+	  break;
+	case EQ: case GT: case GE: case GEU: case GTU:
+	  break;
+	default:
+	  FAIL;
+	}
+      sh_emit_scc_to_t (new_code, op0, op1);
+      operands[1] = gen_rtx_fmt_ee (new_code == code ? NE : EQ, VOIDmode,
+				    gen_rtx_REG (SImode, T_REG), const0_rtx);
+    }
+  else
+    {
+      if (!can_create_pseudo_p ())
+	FAIL;
+
+      operands[1] = sh_emit_cheap_store_flag (GET_MODE (operands[0]),
+					      GET_CODE (operands[1]),
+					      XEXP (operands[1], 0),
+					      XEXP (operands[1], 1));
+      if (!operands[1])
+	FAIL;
+    }
+})
+
+(define_expand "movqicc"
+  [(set (match_operand:QI 0 "register_operand" "")
+	(if_then_else:QI (match_operand 1 "comparison_operator" "")
+			 (match_operand:QI 2 "register_operand" "")
+			 (match_operand:QI 3 "register_operand" "")))]
+  "TARGET_SHMEDIA"
+{
+  operands[0] = simplify_gen_subreg (SImode, operands[0], QImode, 0);
+  operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
+  operands[3] = simplify_gen_subreg (SImode, operands[3], QImode, 0);
+  emit (gen_movsicc (operands[0], operands[1], operands[2], operands[3]));
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Addition instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "arith_reg_operand")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand")
+		 (match_operand:DI 2 "arith_operand")))]
+  ""
+{
+  if (TARGET_SH1)
+    {
+      operands[2] = force_reg (DImode, operands[2]);
+      emit_insn (gen_adddi3_compact (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*adddi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "%r,r")
+		 (match_operand:DI 2 "arith_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	add	%1, %2, %0
+	addi	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*adddisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r,r") 0)
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "%r,r")
+		 (match_operand:DI 2 "arith_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, %2, %0
+	addi.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "adddi3z_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extend:DI
+	 (plus:SI (match_operand:SI 1 "extend_reg_operand" "r")
+		  (match_operand:SI 2 "extend_reg_or_0_operand" "rN"))))]
+  "TARGET_SHMEDIA"
+  "addz.l	%1, %N2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn_and_split "adddi3_compact"
+  [(set (match_operand:DI 0 "arith_reg_dest")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand")
+		 (match_operand:DI 2 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_clrt ());
+  emit_insn (gen_addc (gen_lowpart (SImode, operands[0]),
+		       gen_lowpart (SImode, operands[1]),
+		       gen_lowpart (SImode, operands[2])));
+  emit_insn (gen_addc (gen_highpart (SImode, operands[0]),
+		       gen_highpart (SImode, operands[1]),
+		       gen_highpart (SImode, operands[2])));
+  DONE;
+})
+
+(define_insn "addc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0")
+			  (match_operand:SI 2 "arith_reg_operand" "r"))
+		 (reg:SI T_REG)))
+   (set (reg:SI T_REG)
+	(ltu:SI (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))]
+  "TARGET_SH1"
+  "addc	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; A simplified version of the addc insn, where the exact value of the
+;; T bit doesn't matter.  This is easier for combine to pick up.
+;; We allow a reg or 0 for one of the operands in order to be able to
+;; do 'reg + T' sequences.  Reload will load the constant 0 into the reg
+;; as needed.
+(define_insn "*addc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0")
+			  (match_operand:SI 2 "arith_reg_or_0_operand" "r"))
+		 (match_operand:SI 3 "t_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "addc	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; Split 'reg + reg + 1' into a sett addc sequence, as it can be scheduled
+;; better, if the sett insn can be done early.
+(define_insn_and_split "*addc_r_r_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (plus:SI (match_operand:SI 1 "arith_reg_operand" "")
+			  (match_operand:SI 2 "arith_reg_operand" ""))
+		 (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG) (const_int 1))
+   (parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 1) (match_dup 2))
+				          (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Left shifts by one are usually done with an add insn to avoid T_REG
+;; clobbers.  Thus addc can also be used to do something like '(x << 1) + 1'.
+(define_insn_and_split "*addc_2r_1"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (mult:SI (match_operand:SI 1 "arith_reg_operand")
+			  (const_int 2))
+		 (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG) (const_int 1))
+   (parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 1) (match_dup 1))
+				          (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Sometimes combine will try to do 'reg + (0-reg) + 1' if the *addc pattern
+;; matched.  Split this up into a simple sub add sequence, as this will save
+;; us one sett insn.
+(define_insn_and_split "*minus_plus_one"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "")
+			   (match_operand:SI 2 "arith_reg_operand" ""))
+		 (const_int 1)))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))])
+
+;; Split 'reg + T' into 'reg + 0 + T' to utilize the addc insn.
+;; If the 0 constant can be CSE-ed, this becomes a one instruction
+;; operation, as opposed to sequences such as
+;;	movt	r2
+;;	add	r2,r3
+;;
+;; Even if the constant is not CSE-ed, a sequence such as
+;;	mov	#0,r2
+;;	addc	r2,r3
+;; can be scheduled much better since the load of the constant can be
+;; done earlier, before any comparison insns that store the result in
+;; the T bit.
+(define_insn_and_split "*addc_r_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (match_operand:SI 1 "t_reg_operand" "")
+		 (match_operand:SI 2 "arith_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (plus:SI (match_dup 2) (const_int 0))
+			    (match_dup 1)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Use shlr-addc to do 'reg + (reg & 1)'.
+(define_insn_and_split "*addc_r_lsb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand")
+			 (const_int 1))
+		 (match_operand:SI 2 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0) (plus:SI (reg:SI T_REG) (match_dup 2)))
+	      (clobber (reg:SI T_REG))])]
+{
+  emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[1]));
+})
+
+;; Use shlr-addc to do 'reg + reg + (reg & 1)'.
+(define_insn_and_split "*addc_r_r_lsb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand")
+				  (const_int 1))
+			  (match_operand:SI 2 "arith_reg_operand"))
+		 (match_operand:SI 3 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 2) (match_dup 3))
+				          (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])]
+{
+  emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[1]));
+})
+
+;; Canonicalize 'reg + (reg & 1) + reg' into 'reg + reg + (reg & 1)'.
+(define_insn_and_split "*addc_r_lsb_r"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand")
+			 (const_int 1))
+		 (plus:SI (match_operand:SI 2 "arith_reg_operand")
+			  (match_operand:SI 3 "arith_reg_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (plus:SI (and:SI (match_dup 1) (const_int 1))
+				     (match_dup 2))
+			    (match_dup 3)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Canonicalize '2 * reg + (reg & 1)' into 'reg + reg + (reg & 1)'.
+(define_insn_and_split "*addc_2r_lsb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (and:SI (match_operand:SI 1 "arith_reg_operand")
+			 (const_int 1))
+		 (mult:SI (match_operand:SI 2 "arith_reg_operand")
+			  (const_int 2))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (plus:SI (and:SI (match_dup 1) (const_int 1))
+				     (match_dup 2))
+			    (match_dup 2)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Use shll-addc to do 'reg + ((unsigned int)reg >> 31)'.
+(define_insn_and_split "*addc_r_msb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+			      (const_int 31))
+		 (match_operand:SI 2 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0) (plus:SI (reg:SI T_REG) (match_dup 2)))
+	      (clobber (reg:SI T_REG))])]
+{
+  emit_insn (gen_shll (gen_reg_rtx (SImode), operands[1]));
+})
+
+;; Use shll-addc to do 'reg + reg + ((unsigned int)reg >> 31)'.
+(define_insn_and_split "*addc_r_r_msb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (plus:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+				       (const_int 31))
+		 	  (match_operand:SI 2 "arith_reg_operand"))
+		 (match_operand:SI 3 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0) (plus:SI (plus:SI (match_dup 2) (match_dup 3))
+				          (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])]
+{
+  emit_insn (gen_shll (gen_reg_rtx (SImode), operands[1]));
+})
+
+;; Canonicalize '2 * reg + ((unsigned int)reg >> 31)'
+;; into 'reg + reg + (reg & 1)'.
+(define_insn_and_split "*addc_2r_msb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (mult:SI (match_operand:SI 1 "arith_reg_operand")
+			  (const_int 2))
+		 (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand")
+			      (const_int 31))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (plus:SI (plus:SI (lshiftrt:SI (match_dup 2) (const_int 31))
+				     (match_dup 1))
+			    (match_dup 1)))
+	      (clobber (reg:SI T_REG))])])
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(plus:SI (match_operand:SI 1 "arith_operand" "")
+		 (match_operand:SI 2 "arith_operand" "")))]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    operands[1] = force_reg (SImode, operands[1]);
+})
+
+(define_insn "addsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(plus:SI (match_operand:SI 1 "extend_reg_operand" "%r,r")
+		 (match_operand:SI 2 "arith_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, %2, %0
+	addi.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "addsidi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:DI (plus:SI (match_operand:SI 1 "extend_reg_operand"
+				  "%r,r")
+				 (match_operand:SI 2 "arith_operand"
+				  "r,I10"))))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, %2, %0
+	addi.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "*addsi3_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (match_operand:SI 1 "arith_operand" "%0")
+		 (match_operand:SI 2 "arith_operand" "rI08")))]
+  "TARGET_SH1"
+  "add	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Subtraction instructions
+;; -------------------------------------------------------------------------
+
+(define_expand "subdi3"
+  [(set (match_operand:DI 0 "arith_reg_operand" "")
+	(minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "")
+		  (match_operand:DI 2 "arith_reg_operand" "")))]
+  ""
+{
+  if (TARGET_SH1)
+    {
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_subdi3_compact (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "*subdi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rN")
+		  (match_operand:DI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "sub	%N1, %2, %0"
+  [(set_attr "type" "arith_media")])
+  
+(define_insn "subdisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0)
+	(minus:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rN")
+		  (match_operand:DI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "sub.l	%N1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn_and_split "subdi3_compact"
+  [(set (match_operand:DI 0 "arith_reg_dest")
+	(minus:DI (match_operand:DI 1 "arith_reg_operand")
+		 (match_operand:DI 2 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_clrt ());
+  emit_insn (gen_subc (gen_lowpart (SImode, operands[0]),
+		       gen_lowpart (SImode, operands[1]),
+		       gen_lowpart (SImode, operands[2])));
+  emit_insn (gen_subc (gen_highpart (SImode, operands[0]),
+		       gen_highpart (SImode, operands[1]),
+		       gen_highpart (SImode, operands[2])));
+  DONE;
+})
+
+(define_insn "subc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			    (match_operand:SI 2 "arith_reg_operand" "r"))
+		  (reg:SI T_REG)))
+   (set (reg:SI T_REG)
+	(gtu:SI (minus:SI (minus:SI (match_dup 1) (match_dup 2))
+			  (reg:SI T_REG))
+		(match_dup 1)))]
+  "TARGET_SH1"
+  "subc	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; A simplified version of the subc insn, where the exact value of the
+;; T bit doesn't matter.  This is easier for combine to pick up.
+;; We allow a reg or 0 for one of the operands in order to be able to
+;; do 'reg - T' sequences.  Reload will load the constant 0 into the reg
+;; as needed.
+(define_insn "*subc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			    (match_operand:SI 2 "arith_reg_or_0_operand" "r"))
+		  (match_operand:SI 3 "t_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "subc	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; Split reg - reg - 1 into a sett subc sequence, as it can be scheduled
+;; better, if the sett insn can be done early.
+;; Notice that combine turns 'a - b - 1' into 'a + (~b)'.
+(define_insn_and_split "*subc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(plus:SI (not:SI (match_operand:SI 1 "arith_reg_operand" ""))
+		 (match_operand:SI 2 "arith_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (reg:SI T_REG) (const_int 1))
+   (parallel [(set (match_dup 0)
+		   (minus:SI (minus:SI (match_dup 2) (match_dup 1))
+			     (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])])
+
+;; Split 'reg - T' into 'reg - 0 - T' to utilize the subc insn.
+;; If the 0 constant can be CSE-ed, this becomes a one instruction
+;; operation, as opposed to sequences such as
+;;	movt	r2
+;;	sub	r2,r3
+;;
+;; Even if the constant is not CSE-ed, a sequence such as
+;;	mov	#0,r2
+;;	subc	r2,r3
+;; can be scheduled much better since the load of the constant can be
+;; done earlier, before any comparison insns that store the result in
+;; the T bit.
+(define_insn_and_split "*subc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(minus:SI (match_operand:SI 1 "arith_reg_operand" "")
+		  (match_operand:SI 2 "t_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (minus:SI (minus:SI (match_dup 1) (const_int 0))
+			     (match_dup 2)))
+	      (clobber (reg:SI T_REG))])])
+
+(define_insn "*subsi3_internal"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		  (match_operand:SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "sub	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "*subsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (match_operand:SI 1 "minuend_operand" "rN")
+		  (match_operand:SI 2 "extend_reg_operand" "r")))]
+  "TARGET_SHMEDIA
+   && (operands[1] != constm1_rtx
+       || (GET_CODE (operands[2]) != TRUNCATE
+	   && GET_CODE (operands[2]) != SUBREG))"
+  "sub.l	%N1, %2, %0"
+  "operands[1] == constm1_rtx"
+  [(set (match_dup 0) (xor:SI (match_dup 2) (match_dup 1)))]
+  ""
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(zero_extend:SI (subreg:QI (not:SI (subreg:SI (match_operand:QI 1
+						       "general_extend_operand"
+						       "") 0)) 0)))]
+  "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN"
+  [(set (match_dup 0) (zero_extend:SI (match_dup 1)))
+   (set (match_dup 0) (xor:SI (match_dup 0) (const_int 255)))]
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(zero_extend:SI (subreg:QI (not:SI (subreg:SI (match_operand:QI 1
+						       "general_extend_operand"
+						       "") 0)) 3)))]
+  "TARGET_SHMEDIA && TARGET_BIG_ENDIAN"
+  [(set (match_dup 0) (zero_extend:SI (match_dup 1)))
+   (set (match_dup 0) (xor:SI (match_dup 0) (const_int 255)))]
+  "")
+
+;; Convert
+;;	constant - reg
+;; to
+;;	neg reg
+;;	add reg, #const
+;; since this will sometimes save one instruction.
+;; Otherwise we might get a sequence like
+;;	mov #const, rY
+;;	sub rY, rX
+;;	mov rX, rY
+;; if the source and dest regs are the same.
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(minus:SI (match_operand:SI 1 "arith_operand" "")
+		  (match_operand:SI 2 "arith_reg_operand" "")))]
+  ""
+{
+  if (TARGET_SH1 && CONST_INT_P (operands[1]))
+    {
+      emit_insn (gen_negsi2 (operands[0], operands[2]));
+      emit_insn (gen_addsi3 (operands[0], operands[0], operands[1]));
+      DONE;
+    }
+  if (TARGET_SHMEDIA)
+    {
+      if (!can_create_pseudo_p ()
+	  && ! arith_reg_or_0_operand (operands[1], SImode))
+	FAIL;
+      if (operands[1] != const0_rtx && GET_CODE (operands[1]) != SUBREG)
+	operands[1] = force_reg (SImode, operands[1]);
+    }
+})
+
+;; -------------------------------------------------------------------------
+;; Division instructions
+;; -------------------------------------------------------------------------
+
+;; We take advantage of the library routines which don't clobber as many
+;; registers as a normal function call would.
+
+;; The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
+;; also has an effect on the register that holds the address of the sfunc.
+;; To make this work, we have an extra dummy insn that shows the use
+;; of this register for reorg.
+
+(define_insn "use_sfunc_addr"
+  [(set (reg:SI PR_REG)
+	(unspec:SI [(match_operand:SI 0 "register_operand" "r")] UNSPEC_SFUNC))]
+  "TARGET_SH1 && check_use_sfunc_addr (insn, operands[0])"
+  ""
+  [(set_attr "length" "0")])
+
+(define_insn "udivsi3_sh2a"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(udiv:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		(match_operand:SI 2 "arith_reg_operand" "z")))]
+  "TARGET_SH2A"
+  "divu	%2,%1"
+  [(set_attr "type" "arith")
+   (set_attr "in_delay_slot" "no")])
+
+;; We must use a pseudo-reg forced to reg 0 in the SET_DEST rather than
+;; hard register 0.  If we used hard register 0, then the next instruction
+;; would be a move from hard register 0 to a pseudo-reg.  If the pseudo-reg
+;; gets allocated to a stack slot that needs its address reloaded, then
+;; there is nothing to prevent reload from using r0 to reload the address.
+;; This reload would clobber the value in r0 we are trying to store.
+;; If we let reload allocate r0, then this problem can never happen.
+(define_insn "udivsi3_i1"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R4_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+; Since shmedia-nofpu code could be linked against shcompact code, and
+; the udivsi3 libcall has the same name, we must consider all registers
+; clobbered that are in the union of the registers clobbered by the
+; shmedia and the shcompact implementation.  Note, if the shcompact
+; implementation actually used shcompact code, we'd need to clobber
+; also r23 and fr23.
+(define_insn "udivsi3_i1_media"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R20_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI R22_REG))
+   (clobber (reg:DI TR0_REG))
+   (clobber (reg:DI TR1_REG))
+   (clobber (reg:DI TR2_REG))
+   (use (match_operand 1 "target_reg_operand" "b"))]
+  "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)"
+  "blink	%1, r18"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "udivsi3_i4_media"
+  [(set (match_dup 3)
+	(zero_extend:DI (match_operand:SI 1 "register_operand" "")))
+   (set (match_dup 4)
+	(zero_extend:DI (match_operand:SI 2 "register_operand" "")))
+   (set (match_dup 5) (float:DF (match_dup 3)))
+   (set (match_dup 6) (float:DF (match_dup 4)))
+   (set (match_dup 7) (div:DF (match_dup 5) (match_dup 6)))
+   (set (match_dup 8) (fix:DI (match_dup 7)))
+   (set (match_operand:SI 0 "register_operand" "")
+	(truncate:SI (match_dup 8)))]
+  "TARGET_SHMEDIA_FPU"
+{
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DFmode);
+  operands[6] = gen_reg_rtx (DFmode);
+  operands[7] = gen_reg_rtx (DFmode);
+  operands[8] = gen_reg_rtx (DImode);
+})
+
+(define_insn "udivsi3_i4"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:DF DR0_REG))
+   (clobber (reg:DF DR2_REG))
+   (clobber (reg:DF DR4_REG))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R4_REG))
+   (clobber (reg:SI R5_REG))
+   (use (reg:PSI FPSCR_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "fp_mode" "double")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "udivsi3_i4_single"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:DF DR0_REG))
+   (clobber (reg:DF DR2_REG))
+   (clobber (reg:DF DR4_REG))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R4_REG))
+   (clobber (reg:SI R5_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
+   && TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "udivsi3_i4_int"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+
+(define_expand "udivsi3"
+  [(set (match_dup 3) (symbol_ref:SI "__udivsi3"))
+   (set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" ""))
+   (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" ""))
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (udiv:SI (reg:SI R4_REG)
+			    (reg:SI R5_REG)))
+	      (clobber (reg:SI T_REG))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R4_REG))
+	      (use (match_dup 3))])]
+  ""
+{
+  rtx last;
+
+  operands[3] = gen_reg_rtx (Pmode);
+  /* Emit the move of the address to a pseudo outside of the libcall.  */
+  if (TARGET_DIVIDE_CALL_TABLE)
+    {
+      /* libgcc2:__udivmoddi4 is not supposed to use an actual division, since
+	 that causes problems when the divide code is supposed to come from a
+	 separate library.  Division by zero is undefined, so dividing 1 can be
+	 implemented by comparing with the divisor.  */
+      if (operands[1] == const1_rtx && currently_expanding_to_rtl)
+	{
+	  rtx test = gen_rtx_GEU (VOIDmode, operands[1], operands[2]);
+	  emit_insn (gen_cstoresi4 (operands[0], test,
+				    operands[1], operands[2]));
+	  DONE;
+	}
+      else if (operands[2] == const0_rtx)
+	{
+	  emit_move_insn (operands[0], operands[2]);
+	  DONE;
+	}
+      function_symbol (operands[3], "__udivsi3_i4i", SFUNC_GOT);
+      last = gen_udivsi3_i4_int (operands[0], operands[3]);
+    }
+  else if (TARGET_DIVIDE_CALL_FP)
+    {
+      function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC);
+      if (TARGET_FPU_SINGLE)
+	last = gen_udivsi3_i4_single (operands[0], operands[3]);
+      else
+	last = gen_udivsi3_i4 (operands[0], operands[3]);
+    }
+  else if (TARGET_SHMEDIA_FPU)
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_udivsi3_i4_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (TARGET_SH2A)
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_udivsi3_sh2a (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (TARGET_SH5)
+    {
+      function_symbol (operands[3],
+		       TARGET_FPU_ANY ? "__udivsi3_i4" : "__udivsi3",
+		       SFUNC_STATIC);
+
+      if (TARGET_SHMEDIA)
+	last = gen_udivsi3_i1_media (operands[0], operands[3]);
+      else if (TARGET_FPU_ANY)
+	last = gen_udivsi3_i4_single (operands[0], operands[3]);
+      else
+	last = gen_udivsi3_i1 (operands[0], operands[3]);
+    }
+  else
+    {
+      function_symbol (operands[3], "__udivsi3", SFUNC_STATIC);
+      last = gen_udivsi3_i1 (operands[0], operands[3]);
+    }
+  emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
+  emit_insn (last);
+  DONE;
+})
+
+(define_insn "divsi3_sh2a"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(div:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		(match_operand:SI 2 "arith_reg_operand" "z")))]
+  "TARGET_SH2A"
+  "divs	%2,%1"
+  [(set_attr "type" "arith")
+   (set_attr "in_delay_slot" "no")])
+
+(define_insn "divsi3_i1"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R2_REG))
+   (clobber (reg:SI R3_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "divsi3_i1_media"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R20_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI TR0_REG))
+   (use (match_operand 1 "target_reg_operand" "b"))]
+  "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)"
+  "blink	%1, r18"
+  [(set_attr "type" "sfunc")])
+
+(define_insn "divsi3_media_2"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI TR0_REG))
+   (use (reg:SI R20_REG))
+   (use (match_operand 1 "target_reg_operand" "b"))]
+  "TARGET_SHMEDIA && (! TARGET_SHMEDIA_FPU || ! TARGET_DIVIDE_FP)"
+  "blink	%1, r18"
+  [(set_attr "type" "sfunc")])
+
+;; This pattern acts as a placeholder for -mdiv=inv:call to carry
+;; hard reg clobbers and data dependencies that we need when we want
+;; to rematerialize the division into a call.
+(define_insn_and_split "divsi_inv_call"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(div:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:SI R4_REG))
+   (clobber (reg:SI R5_REG))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI TR0_REG))
+   (clobber (reg:SI R20_REG))
+   (use (match_operand:SI 3 "register_operand" "r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& (reload_in_progress || reload_completed)"
+  [(set (match_dup 0) (match_dup 3))]
+  ""
+  [(set_attr "highpart" "must_split")])
+
+;; This is the combiner pattern for -mdiv=inv:call .
+(define_insn_and_split "*divsi_inv_call_combine"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))
+   (clobber (reg:SI R4_REG))
+   (clobber (reg:SI R5_REG))
+   (clobber (reg:SI T_MEDIA_REG))
+   (clobber (reg:SI PR_MEDIA_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R21_REG))
+   (clobber (reg:SI TR0_REG))
+   (clobber (reg:SI R20_REG))
+   (use (unspec:SI [(match_dup 1)
+		    (match_operand:SI 3 "" "")
+		    (unspec:SI [(match_operand:SI 4 "" "")
+				(match_dup 3)
+				(match_operand:DI 5 "" "")]
+		     UNSPEC_DIV_INV_M2)
+		    (match_operand:DI 6 "" "")
+		    (const_int 0)
+		    (const_int 0)]
+	 UNSPEC_DIV_INV_M3))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& (reload_in_progress || reload_completed)"
+  [(pc)]
+{
+  const char *name = sh_divsi3_libfunc;
+  enum sh_function_kind kind = SFUNC_GOT;
+  rtx sym;
+
+  emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, R5_REG), operands[2]);
+  while (TARGET_DIVIDE_INV_CALL2)
+    {
+      rtx x = operands[3];
+
+      if (GET_CODE (x) != UNSPEC || XINT (x, 1) != UNSPEC_DIV_INV_M1)
+	break;
+      x = XVECEXP (x, 0, 0);
+      name = "__sdivsi3_2";
+      kind = SFUNC_STATIC;
+      emit_move_insn (gen_rtx_REG (DImode, R20_REG), x);
+      break;
+    }
+  sym = function_symbol (NULL, name, kind);
+  emit_insn (gen_divsi3_media_2 (operands[0], sym));
+  DONE;
+}
+  [(set_attr "highpart" "must_split")])
+
+(define_expand "divsi3_i4_media"
+  [(set (match_dup 3) (float:DF (match_operand:SI 1 "register_operand" "r")))
+   (set (match_dup 4) (float:DF (match_operand:SI 2 "register_operand" "r")))
+   (set (match_dup 5) (div:DF (match_dup 3) (match_dup 4)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(fix:SI (match_dup 5)))]
+  "TARGET_SHMEDIA_FPU"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = gen_reg_rtx (DFmode);
+  operands[5] = gen_reg_rtx (DFmode);
+})
+
+(define_insn "divsi3_i4"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:DF DR0_REG))
+   (clobber (reg:DF DR2_REG))
+   (use (reg:PSI FPSCR_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "fp_mode" "double")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "divsi3_i4_single"
+  [(set (match_operand:SI 0 "register_operand" "=y")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:DF DR0_REG))
+   (clobber (reg:DF DR2_REG))
+   (clobber (reg:SI R2_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "(TARGET_FPU_SINGLE_ONLY || TARGET_FPU_DOUBLE || TARGET_SHCOMPACT)
+   && TARGET_FPU_SINGLE"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "divsi3_i4_int"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(div:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "divsi3"
+  [(set (match_dup 3) (symbol_ref:SI "__sdivsi3"))
+   (set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" ""))
+   (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" ""))
+   (parallel [(set (match_operand:SI 0 "register_operand" "")
+		   (div:SI (reg:SI R4_REG)
+			   (reg:SI R5_REG)))
+	      (clobber (reg:SI T_REG))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R1_REG))
+	      (clobber (reg:SI R2_REG))
+	      (clobber (reg:SI R3_REG))
+	      (use (match_dup 3))])]
+  ""
+{
+  rtx last;
+
+  operands[3] = gen_reg_rtx (Pmode);
+  /* Emit the move of the address to a pseudo outside of the libcall.  */
+  if (TARGET_DIVIDE_CALL_TABLE)
+    {
+      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT);
+      last = gen_divsi3_i4_int (operands[0], operands[3]);
+    }
+  else if (TARGET_DIVIDE_CALL_FP)
+    {
+      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC);
+      if (TARGET_FPU_SINGLE)
+	last = gen_divsi3_i4_single (operands[0], operands[3]);
+      else
+	last = gen_divsi3_i4 (operands[0], operands[3]);
+    }
+  else if (TARGET_SH2A)
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_divsi3_sh2a (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (TARGET_DIVIDE_INV)
+    {
+      rtx dividend = operands[1];
+      rtx divisor = operands[2];
+      rtx tab_base;
+      rtx nsb_res = gen_reg_rtx (DImode);
+      rtx norm64 = gen_reg_rtx (DImode);
+      rtx tab_ix = gen_reg_rtx (DImode);
+      rtx norm32 = gen_reg_rtx (SImode);
+      rtx i92 = force_reg (DImode, GEN_INT (92));
+      rtx scratch0a = gen_reg_rtx (DImode);
+      rtx scratch0b = gen_reg_rtx (DImode);
+      rtx inv0 = gen_reg_rtx (SImode);
+      rtx scratch1a = gen_reg_rtx (DImode);
+      rtx scratch1b = gen_reg_rtx (DImode);
+      rtx shift = gen_reg_rtx (DImode);
+      rtx i2p27, i43;
+      rtx inv1 = gen_reg_rtx (SImode);
+      rtx scratch2a = gen_reg_rtx (DImode);
+      rtx scratch2b = gen_reg_rtx (SImode);
+      rtx inv2 = gen_reg_rtx (SImode);
+      rtx scratch3a = gen_reg_rtx (DImode);
+      rtx scratch3b = gen_reg_rtx (DImode);
+      rtx scratch3c = gen_reg_rtx (DImode);
+      rtx scratch3d = gen_reg_rtx (SImode);
+      rtx scratch3e = gen_reg_rtx (DImode);
+      rtx result = gen_reg_rtx (SImode);
+
+      if (! arith_reg_or_0_operand (dividend, SImode))
+	dividend = force_reg (SImode, dividend);
+      if (! arith_reg_operand (divisor, SImode))
+	divisor = force_reg (SImode, divisor);
+      if (flag_pic && Pmode != DImode)
+	{
+	  tab_base = gen_rtx_SYMBOL_REF (Pmode, "__div_table");
+	  tab_base = gen_datalabel_ref (tab_base);
+	  tab_base = force_reg (DImode, gen_rtx_SIGN_EXTEND (DImode, tab_base));
+	}
+      else
+	{
+	  tab_base = gen_rtx_SYMBOL_REF (DImode, "__div_table");
+	  tab_base = gen_datalabel_ref (tab_base);
+	  tab_base = force_reg (DImode, tab_base);
+	}
+      if (TARGET_DIVIDE_INV20U)
+	i2p27 = force_reg (DImode, GEN_INT (-2 << 27));
+      else
+	i2p27 = GEN_INT (0);
+      if (TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L)
+	i43 = force_reg (DImode, GEN_INT (43));
+      else
+	i43 = GEN_INT (0);
+      emit_insn (gen_nsbdi (nsb_res,
+			    simplify_gen_subreg (DImode, divisor, SImode, 0)));
+      emit_insn (gen_ashldi3_media (norm64,
+				    gen_rtx_SUBREG (DImode, divisor, 0),
+				    nsb_res));
+      emit_insn (gen_ashrdi3_media (tab_ix, norm64, GEN_INT (58)));
+      emit_insn (gen_ashrdisi3_media_high (norm32, norm64, GEN_INT (32)));
+      emit_insn (gen_divsi_inv_m1 (inv1, tab_base, tab_ix, norm32,
+				   inv0, scratch0a, scratch0b,
+				   scratch1a, scratch1b));
+      emit_insn (gen_subdi3 (shift, i92, nsb_res));
+      emit_insn (gen_divsi_inv_m2 (inv2, norm32, inv1, i92,
+				   scratch2a));
+      emit_insn (gen_divsi_inv_m3 (result, dividend, inv1, inv2, shift,
+				   i2p27, i43,
+				   scratch3a, scratch3b, scratch3c,
+				   scratch2a, scratch2b, scratch3d, scratch3e));
+      if (TARGET_DIVIDE_INV_CALL || TARGET_DIVIDE_INV_CALL2)
+	emit_insn (gen_divsi_inv_call (operands[0], dividend, divisor, result));
+      else if (TARGET_DIVIDE_INV_FP)
+	emit_insn (gen_divsi_inv_fp (operands[0], dividend, divisor, result,
+				     gen_reg_rtx (SImode), gen_reg_rtx (SImode),
+				     gen_reg_rtx (DFmode), gen_reg_rtx (DFmode),
+				     gen_reg_rtx (DFmode)));
+      else
+	emit_move_insn (operands[0], result);
+      DONE;
+    }
+  else if (TARGET_SHMEDIA_FPU && TARGET_DIVIDE_FP)
+    {
+      operands[1] = force_reg (SImode, operands[1]);
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_divsi3_i4_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (TARGET_SH5)
+    {
+      if (TARGET_DIVIDE_CALL2)
+	{
+	  rtx tab_base = gen_rtx_SYMBOL_REF (Pmode, "__div_table");
+	  tab_base = gen_datalabel_ref (tab_base);
+	  emit_move_insn (gen_rtx_REG (Pmode, R20_REG), tab_base);
+	}
+      if (TARGET_FPU_ANY && TARGET_SH1)
+	function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_STATIC);
+      else if (TARGET_DIVIDE_CALL2)
+	function_symbol (operands[3], "__sdivsi3_2", SFUNC_STATIC);
+      else
+	function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT);
+
+      if (TARGET_SHMEDIA)
+	last = ((TARGET_DIVIDE_CALL2 ? gen_divsi3_media_2 : gen_divsi3_i1_media)
+		(operands[0], operands[3]));
+      else if (TARGET_FPU_ANY)
+	last = gen_divsi3_i4_single (operands[0], operands[3]);
+      else
+	last = gen_divsi3_i1 (operands[0], operands[3]);
+    }
+  else
+    {
+      function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT);
+      last = gen_divsi3_i1 (operands[0], operands[3]);
+    }
+  emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
+  emit_insn (last);
+  DONE;
+})
+
+;; operands: scratch, tab_base, tab_ix
+;; These are unspecs because we could generate an indexed addressing mode
+;; even if -m5-32media, where INDEX_REG_CLASS == NO_REGS, and this would
+;; confuse reload.  See PR27117.
+(define_insn "divsi_inv_qitable"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (unspec:QI [(match_operand:DI 1 "register_operand" "r")
+				    (match_operand:DI 2 "register_operand" "r")]
+			 UNSPEC_DIV_INV_TABLE)))]
+  "TARGET_SHMEDIA"
+  "ldx.ub	%1, %2, %0"
+  [(set_attr "type" "load_media")
+   (set_attr "highpart" "user")])
+
+;; operands: scratch, tab_base, tab_ix
+(define_insn "divsi_inv_hitable"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (unspec:HI [(match_operand:DI 1 "register_operand" "r")
+				    (match_operand:DI 2 "register_operand" "r")]
+			 UNSPEC_DIV_INV_TABLE)))]
+  "TARGET_SHMEDIA"
+  "ldx.w	%1, %2, %0"
+  [(set_attr "type" "load_media")
+   (set_attr "highpart" "user")])
+
+;; operands: inv0, tab_base, tab_ix, norm32
+;; scratch equiv in sdivsi3_2: r19, r21
+(define_expand "divsi_inv_m0"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")]
+	 UNSPEC_DIV_INV_M0))
+   (clobber (match_operand:DI 4 "register_operand" "=r"))
+   (clobber (match_operand:DI 5 "register_operand" "=r"))]
+  "TARGET_SHMEDIA"
+{
+/*
+tab_base: r20
+tab_ix: r21
+norm32: r25
+ ldx.ub r20, r21, r19 // u0.8
+ shlli r21, 1, r21
+ muls.l r25, r19, r19 // s2.38
+ ldx.w r20, r21, r21  // s2.14
+ shari r19, 24, r19   // truncate to s2.14
+ sub r21, r19, r19    // some 11 bit inverse in s1.14
+*/
+
+  rtx inv0 = operands[0];
+  rtx tab_base = operands[1];
+  rtx tab_ix = operands[2];
+  rtx norm32 = operands[3];
+  rtx scratch0 = operands[4];
+  rtx scratch0_si = gen_lowpart (SImode, scratch0);
+  rtx scratch1 = operands[5];
+
+  emit_insn (gen_divsi_inv_qitable (scratch0, tab_base, tab_ix));
+  emit_insn (gen_ashldi3_media (scratch1, tab_ix, GEN_INT (1)));
+  emit_insn (gen_mulsidi3_media (scratch0, norm32, scratch0_si));
+  emit_insn (gen_divsi_inv_hitable (scratch1, tab_base, scratch1));
+  emit_insn (gen_ashrdi3_media (scratch0, scratch0, GEN_INT (24)));
+  emit_insn (gen_subdisi3_media (inv0, scratch1, scratch0));
+  DONE;
+})
+
+;; operands: inv1, tab_base, tab_ix, norm32
+(define_insn_and_split "divsi_inv_m1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")]
+	 UNSPEC_DIV_INV_M1))
+   (clobber (match_operand:SI 4 "register_operand" "=r"))
+   (clobber (match_operand:DI 5 "register_operand" "=r"))
+   (clobber (match_operand:DI 6 "register_operand" "=r"))
+   (clobber (match_operand:DI 7 "register_operand" "=r"))
+   (clobber (match_operand:DI 8 "register_operand" "=r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& !can_create_pseudo_p ()"
+  [(pc)]
+{
+/* inv0: r19
+ muls.l r19, r19, r18 // u0.28
+ muls.l r25, r18, r18 // s2.58
+ shlli r19, 45, r0    // multiply by two and convert to s2.58
+ sub r0, r18, r18
+ shari r18, 28, r18   // some 18 bit inverse in s1.30
+*/
+
+  rtx inv1 = operands[0];
+  rtx tab_base = operands[1];
+  rtx tab_ix = operands[2];
+  rtx norm32 = operands[3];
+  rtx inv0 = operands[4];
+  rtx inv0_di = simplify_gen_subreg (DImode, inv0, SImode, 0);
+  rtx scratch0a = operands[5];
+  rtx scratch0b = operands[6];
+  rtx scratch0 = operands[7];
+  rtx scratch1 = operands[8];
+  rtx scratch1_si = gen_lowpart (SImode, scratch1);
+
+  emit_insn (gen_divsi_inv_m0 (inv0, tab_base, tab_ix, norm32,
+			       scratch0a, scratch0b));
+  emit_insn (gen_mulsidi3_media (scratch1, inv0, inv0));
+  emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch1_si));
+  emit_insn (gen_ashldi3_media (scratch0, inv0_di, GEN_INT (45)));
+  emit_insn (gen_subdi3 (scratch1, scratch0, scratch1));
+  emit_insn (gen_ashrdisi3_media_opaque (inv1, scratch1, GEN_INT (28)));
+  DONE;
+})
+
+;; operands: inv2, norm32, inv1, i92
+(define_insn_and_split "divsi_inv_m2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:DI 3 "register_operand" "r")]
+	 UNSPEC_DIV_INV_M2))
+   (clobber (match_operand:DI 4 "register_operand" "=r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& !can_create_pseudo_p ()"
+  [(pc)]
+{
+/*
+ muls.l r18, r25, r0  // s2.60
+ shari r0, 16, r0     // s-16.44
+  sub
+ muls.l r0, r18, r19  // s-16.74
+ shari r19, 30, r19   // s-16.44
+*/
+  rtx inv2 = operands[0];
+  rtx norm32 = operands[1];
+  rtx inv1 = operands[2];
+  rtx i92 = operands[3];
+  rtx scratch0 = operands[4];
+  rtx scratch0_si = gen_lowpart (SImode, scratch0);
+
+  emit_insn (gen_mulsidi3_media (scratch0, inv1, norm32));
+  emit_insn (gen_ashrdi3_media (scratch0, scratch0, GEN_INT (16)));
+  emit_insn (gen_subdi3 (scratch0, i92, scratch0));
+  emit_insn (gen_mulsidi3_media (scratch0, scratch0_si, inv1));
+  emit_insn (gen_ashrdisi3_media_opaque (inv2, scratch0, GEN_INT (30)));
+  DONE;
+})
+
+(define_insn_and_split "divsi_inv_m3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")
+		    (match_operand:DI 4 "register_operand" "r")
+		    (match_operand:DI 5 "arith_reg_or_0_operand" "rN")
+		    (match_operand:DI 6 "arith_reg_or_0_operand" "rN")]
+	 UNSPEC_DIV_INV_M3))
+   (clobber (match_operand:DI 7 "register_operand" "=r"))
+   (clobber (match_operand:DI 8 "register_operand" "=r"))
+   (clobber (match_operand:DI 9 "register_operand" "=r"))
+   (clobber (match_operand:DI 10 "register_operand" "=r"))
+   (clobber (match_operand:SI 11 "register_operand" "=r"))
+   (clobber (match_operand:SI 12 "register_operand" "=r"))
+   (clobber (match_operand:DI 13 "register_operand" "=r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& !can_create_pseudo_p ()"
+  [(pc)]
+{
+/*
+  r0: result  r1: shift  r4: dividend  r18: inv1  r19: inv2
+  r0: scratch0  r19: scratch1 r21: scratch2
+
+  muls.l r18, r4, r25 // s32.30
+ muls.l r19, r4, r19  // s15.30
+ shari r25, 63, r21
+  shari r19, 14, r19  // s18.-14
+ sub r25, r19, r0
+ shard r0, r1, r0
+ sub r0, r21, r0
+*/
+
+  rtx result = operands[0];
+  rtx dividend = operands[1];
+  rtx inv1 = operands[2];
+  rtx inv2 = operands[3];
+  rtx shift = operands[4];
+  rtx scratch0 = operands[7];
+  rtx scratch1 = operands[8];
+  rtx scratch2 = operands[9];
+
+  if (satisfies_constraint_N (dividend))
+    {
+      emit_move_insn (result, dividend);
+      DONE;
+    }
+
+  emit_insn (gen_mulsidi3_media (scratch0, inv1, dividend));
+  emit_insn (gen_mulsidi3_media (scratch1, inv2, dividend));
+  emit_insn (gen_ashrdi3_media (scratch2, scratch0, GEN_INT (63)));
+  emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (14)));
+  emit_insn (gen_adddi3 (scratch0, scratch0, scratch1));
+  emit_insn (gen_ashrdi3_media (scratch0, scratch0, shift));
+  emit_insn (gen_subdisi3_media (result, scratch0, scratch2));
+  DONE;
+})
+
+;; operands: quotient, dividend, inv1, inv2, shift, i2p27, i43
+;; inv1: tab_base, tab_ix, norm32
+;; inv2: norm32, inv1, i92
+(define_insn_and_split "divsi_inv_m1_3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
+		    (unspec:SI [(match_operand:DI 2 "register_operand" "r")
+				(match_operand:DI 3 "register_operand" "r")
+				(match_operand:SI 4 "register_operand" "r")]
+		     UNSPEC_DIV_INV_M1)
+		    (unspec:SI [(match_dup 4)
+				(unspec:SI [(match_dup 2)
+					    (match_dup 3)
+					    (match_dup 4)] UNSPEC_DIV_INV_M1)
+				(match_operand:SI 5 "" "")]
+		     UNSPEC_DIV_INV_M2)
+		    (match_operand:DI 6 "register_operand" "r")
+		    (match_operand:DI 7 "arith_reg_or_0_operand" "rN")
+		    (match_operand:DI 8 "arith_reg_or_0_operand" "rN")]
+	 UNSPEC_DIV_INV_M3))
+   (clobber (match_operand:DI 9 "register_operand" "=r"))
+   (clobber (match_operand:DI 10 "register_operand" "=r"))
+   (clobber (match_operand:DI 11 "register_operand" "=r"))
+   (clobber (match_operand:DI 12 "register_operand" "=r"))
+   (clobber (match_operand:SI 13 "register_operand" "=r"))
+   (clobber (match_operand:SI 14 "register_operand" "=r"))
+   (clobber (match_operand:DI 15 "register_operand" "=r"))]
+  "TARGET_SHMEDIA
+   && (TARGET_DIVIDE_INV_MINLAT
+       || TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L)"
+  "#"
+  "&& !can_create_pseudo_p ()"
+  [(pc)]
+{
+  rtx result = operands[0];
+  rtx dividend = operands[1];
+  rtx tab_base = operands[2];
+  rtx tab_ix = operands[3];
+  rtx norm32 = operands[4];
+  /* rtx i92 = operands[5]; */
+  rtx shift = operands[6];
+  rtx i2p27 = operands[7];
+  rtx i43 = operands[8];
+  rtx scratch0 = operands[9];
+  rtx scratch0_si = gen_lowpart (SImode, scratch0);
+  rtx scratch1 = operands[10];
+  rtx scratch1_si = gen_lowpart (SImode, scratch1);
+  rtx scratch2 = operands[11];
+  rtx scratch3 = operands[12];
+  rtx scratch4 = operands[13];
+  rtx scratch4_di = simplify_gen_subreg (DImode, scratch4, SImode, 0);
+  rtx scratch5 = operands[14];
+  rtx scratch5_di = simplify_gen_subreg (DImode, scratch5, SImode, 0);
+  rtx scratch6 = operands[15];
+
+  emit_insn (gen_divsi_inv_m0 (scratch4, tab_base, tab_ix, norm32,
+			       scratch0, scratch1));
+  /* inv0 == scratch4 */
+  if (! TARGET_DIVIDE_INV20U)
+    {
+      emit_insn (gen_mulsidi3_media (scratch0, scratch4, scratch4));
+      i2p27 = scratch0;
+      emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch0_si));
+    }
+  else
+    {
+      emit_insn (gen_mulsidi3_media (scratch1, scratch4, scratch4));
+      emit_insn (gen_mulsidi3_media (scratch1, norm32, scratch1_si));
+    }
+  emit_insn (gen_ashldi3_media (scratch2, scratch4_di, GEN_INT (45)));
+  emit_insn (gen_subdi3 (scratch1, scratch2, scratch1));
+  emit_insn (gen_ashrdisi3_media_opaque (scratch4, scratch1, GEN_INT (28)));
+  /* inv1 == scratch4 */
+
+  if (TARGET_DIVIDE_INV_MINLAT)
+    {
+      emit_insn (gen_mulsidi3_media (scratch1, scratch4, norm32));
+      emit_insn (gen_mulsidi3_media (scratch2, dividend, scratch4));
+      emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (16)));
+      emit_insn (gen_mulsidi3_media (scratch1, scratch1_si, scratch4));
+      emit_insn (gen_ashrdi3_media (scratch3, scratch2, GEN_INT (63)));
+      emit_insn (gen_ashrsi3_media (scratch5, dividend, GEN_INT (14)));
+      emit_insn (gen_ashrdi3_media (scratch1, scratch1, GEN_INT (30)));
+      emit_insn (gen_mulsidi3_media (scratch1, scratch1_si, scratch5));
+      emit_insn (gen_xordi3 (scratch0, scratch3, i2p27));
+      emit_insn (gen_adddi3 (scratch2, scratch2, scratch0));
+      emit_insn (gen_subdi3 (scratch2, scratch2, scratch1));
+    }
+  else
+    {
+      rtx label = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
+      /* Use separate scratch regs for nsb and sign to allow scheduling.  */
+      emit_insn (gen_nsbdi (scratch6,
+			    simplify_gen_subreg (DImode, dividend, SImode, 0)));
+      emit_insn (gen_xorsi3 (scratch5, dividend, norm32));
+      emit_insn (gen_ashrdi3_media (scratch3, scratch5_di, GEN_INT (63)));
+      emit_insn (gen_divsi_inv20 (scratch2,
+				  norm32, scratch4, dividend,
+				  scratch6, scratch3, i43,
+				  /* scratch0 may be shared with i2p27.  */
+				  scratch0, scratch1, scratch5,
+				  label, label, i2p27));
+    }
+  emit_insn (gen_ashrdi3_media (scratch2, scratch2, shift));
+  emit_insn (gen_subdisi3_media (result, scratch2, scratch3));
+  DONE;
+})
+
+(define_insn "divsi_inv20"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(unspec:DI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "register_operand" "r")
+		    (match_operand:SI 3 "register_operand" "r")
+		    (match_operand:DI 4 "register_operand" "r")
+		    (match_operand:DI 5 "register_operand" "r")
+		    (match_operand:DI 6 "register_operand" "r")
+		    (match_operand:DI 12 "register_operand" "r")
+		    (match_operand 10 "target_operand" "b")
+		    (match_operand 11 "immediate_operand" "i")]
+	 UNSPEC_DIV_INV20))
+   (clobber (match_operand:DI 7 "register_operand" "=&r"))
+   (clobber (match_operand:DI 8 "register_operand" "=&r"))
+   (clobber (match_operand:SI 9 "register_operand" "=r"))]
+  "TARGET_SHMEDIA
+   && (TARGET_DIVIDE_INV20U || TARGET_DIVIDE_INV20L)"
+{
+/* operands: %0 div_result, %1 norm32, %2 inv1, %3 dividend,
+	     %4 dividend_nsb, %5 result_sign, %6 i43, %12 i2p27,
+	     %7 round_scratch, %8 scratch0 (di), %9 scratch1 (si)
+	     %10 label (tr), %11 label (imm)
+
+ muls.l inv1, norm32, scratch0  // s2.60
+  muls.l inv1, dividend, result // s32.30
+  xor i2p27, result_sign, round_scratch
+ bge/u dividend_nsb, i43, tr.. (label)
+ shari scratch0, 16, scratch0   // s-16.44
+ muls.l sratch0_si, inv1, scratch0 // s-16.74
+  sub result, round_scratch, result
+  shari dividend, 14, scratch1   // s19.-14
+ shari scratch0, 30, scratch0   // s-16.44
+ muls.l scratch0, scratch1, round_scratch // s15.30
+label:
+ sub result, round_scratch, result */
+
+  const bool likely = TARGET_DIVIDE_INV20L;
+  if (likely)
+    return
+	       "muls.l	%2, %3, %0"	"\n"
+	"	xor	%12, %5, %7"	"\n"
+	"	bge/l	%4, %6, %10"	"\n"
+	"	muls.l	%2, %1, %8"	"\n"
+	"	shari	%8, 16, %8"	"\n"
+	"	muls.l	%8, %2, %8"	"\n"
+	"	shari	%3, 14, %9"	"\n"
+	"	shari	%8, 30, %8"	"\n"
+	"	muls.l	%8, %9, %8"	"\n"
+	"	sub	%0, %8, %0"	"\n"
+	"%11:	add	%0, %7, %0";
+  else
+    return
+	       "muls.l	%2, %1, %8"	"\n"
+	"	muls.l	%2, %3, %0"	"\n"
+	"	xor	%12, %5, %7"	"\n"
+	"	bge/u	%4, %6, %10"	"\n"
+	"	shari	%8, 16, %8"	"\n"
+	"	muls.l	%8, %2, %8"	"\n"
+	"	sub	%0, %7, %0"	"\n"
+	"	shari	%3, 14, %9"	"\n"
+	"	shari	%8, 30, %8"	"\n"
+	"	muls.l	%8, %9, %7"	"\n"
+	"%11:	sub	%0, %7, %0";
+})
+
+(define_insn_and_split "divsi_inv_fp"
+  [(set (match_operand:SI 0 "general_movdst_operand" "=rf")
+	(div:SI (match_operand:SI 1 "general_movsrc_operand" "rf")
+		(match_operand:SI 2 "register_operand" "rf")))
+   (use (match_operand:SI 3 "general_movsrc_operand" "r"))
+   (clobber (match_operand:SI 4 "register_operand" "=r"))
+   (clobber (match_operand:SI 5 "register_operand" "=r"))
+   (clobber (match_operand:DF 6 "register_operand" "=r"))
+   (clobber (match_operand:DF 7 "register_operand" "=r"))
+   (clobber (match_operand:DF 8 "register_operand" "=r"))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& (reload_in_progress || reload_completed)"
+  [(set (match_dup 0) (match_dup 3))]
+  ""
+  [(set_attr "highpart" "must_split")])
+
+;; If a matching group of divide-by-inverse instructions is in the same
+;; basic block after gcse & loop optimizations, we want to transform them
+;; to a straight division using floating point for TARGET_DIVIDE_INV_FP.
+(define_insn_and_split "*divsi_inv_fp_combine"
+  [(set (match_operand:SI 0 "register_operand" "=f")
+	(div:SI (match_operand:SI 1 "register_operand" "f")
+		(match_operand:SI 2 "register_operand" "f")))
+   (use (unspec:SI [(match_dup 1)
+		    (match_operand:SI 3 "" "")
+		    (unspec:SI [(match_operand:SI 4 "" "")
+				(match_dup 3)
+				(match_operand:DI 5 "" "")] UNSPEC_DIV_INV_M2)
+		    (match_operand:DI 6 "" "")
+		    (const_int 0)
+		    (const_int 0)] UNSPEC_DIV_INV_M3))
+   (clobber (match_operand:SI 7 "fp_arith_reg_operand" ""))
+   (clobber (match_operand:SI 8 "fp_arith_reg_operand" ""))
+   (clobber (match_operand:DF 9 "fp_arith_reg_operand" ""))
+   (clobber (match_operand:DF 10 "fp_arith_reg_operand" ""))
+   (clobber (match_operand:DF 11 "fp_arith_reg_operand" ""))]
+  "TARGET_SHMEDIA_FPU && TARGET_DIVIDE_INV_FP && !can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 9) (float:DF (match_dup 1)))
+   (set (match_dup 10) (float:DF (match_dup 2)))
+   (set (match_dup 11) (div:DF (match_dup 9) (match_dup 10)))
+   (set (match_dup 8)
+	(fix:SI (match_dup 11)))
+   (set (match_dup 0) (match_dup 8))]
+{
+  if (! fp_arith_reg_operand (operands[1], SImode))
+    {
+      emit_move_insn (operands[7], operands[1]);
+      operands[1] = operands[7];
+    }
+  if (! fp_arith_reg_operand (operands[2], SImode))
+    {
+      emit_move_insn (operands[8], operands[2]);
+      operands[2] = operands[8];
+    }
+}
+  [(set_attr "highpart" "must_split")])
+
+;; -------------------------------------------------------------------------
+;; Multiplication instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "umulhisi3_i"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 0 "arith_reg_operand" "r"))
+		 (zero_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" "r"))))]
+  "TARGET_SH1"
+  "mulu.w	%1,%0"
+  [(set_attr "type" "smpy")])
+
+(define_insn "mulhisi3_i"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 0 "arith_reg_operand" "r"))
+		 (sign_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" "r"))))]
+  "TARGET_SH1"
+  "muls.w	%1,%0"
+  [(set_attr "type" "smpy")])
+
+(define_expand "mulhisi3"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (sign_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" ""))
+		 (sign_extend:SI
+		  (match_operand:HI 2 "arith_reg_operand" ""))))
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACL_REG))]
+  "TARGET_SH1"
+{
+  rtx insn, macl;
+
+  macl = gen_rtx_REG (SImode, MACL_REG);
+  start_sequence ();
+  emit_insn (gen_mulhisi3_i (operands[1], operands[2]));
+  insn = get_insns ();  
+  end_sequence ();
+  /* expand_binop can't find a suitable code in umul_widen_optab to
+     make a REG_EQUAL note from, so make one here.
+     See also smulsi3_highpart.
+     ??? Alternatively, we could put this at the calling site of expand_binop,
+     i.e. expand_expr.  */
+  /* Use emit_libcall_block for loop invariant code motion and to make
+     a REG_EQUAL note.  */
+  emit_libcall_block (insn, operands[0], macl, SET_SRC (single_set (insn)));
+
+  DONE;
+})
+
+(define_expand "umulhisi3"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (zero_extend:SI
+		  (match_operand:HI 1 "arith_reg_operand" ""))
+		 (zero_extend:SI
+		  (match_operand:HI 2 "arith_reg_operand" ""))))
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACL_REG))]
+  "TARGET_SH1"
+{
+  rtx insn, macl;
+
+  macl = gen_rtx_REG (SImode, MACL_REG);
+  start_sequence ();
+  emit_insn (gen_umulhisi3_i (operands[1], operands[2]));
+  insn = get_insns ();  
+  end_sequence ();
+  /* expand_binop can't find a suitable code in umul_widen_optab to
+     make a REG_EQUAL note from, so make one here.
+     See also smulsi3_highpart.
+     ??? Alternatively, we could put this at the calling site of expand_binop,
+     i.e. expand_expr.  */
+  /* Use emit_libcall_block for loop invariant code motion and to make
+     a REG_EQUAL note.  */
+  emit_libcall_block (insn, operands[0], macl, SET_SRC (single_set (insn)));
+
+  DONE;
+})
+
+;; mulsi3 on the SH2 can be done in one instruction, on the SH1 we generate
+;; a call to a routine which clobbers known registers.
+(define_insn ""
+  [(set (match_operand:SI 1 "register_operand" "=z")
+	(mult:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (clobber (reg:SI MACL_REG))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI R3_REG))
+   (clobber (reg:SI R2_REG))
+   (clobber (reg:SI R1_REG))
+   (use (match_operand:SI 0 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "mulsi3_call"
+  [(set (reg:SI R4_REG) (match_operand:SI 1 "general_operand" ""))
+   (set (reg:SI R5_REG) (match_operand:SI 2 "general_operand" ""))
+   (parallel[(set (match_operand:SI 0 "register_operand" "")
+		  (mult:SI (reg:SI R4_REG)
+			   (reg:SI R5_REG)))
+	     (clobber (reg:SI MACL_REG))
+	     (clobber (reg:SI T_REG))
+	     (clobber (reg:SI PR_REG))
+	     (clobber (reg:SI R3_REG))
+	     (clobber (reg:SI R2_REG))
+	     (clobber (reg:SI R1_REG))
+	     (use (match_operand:SI 3 "register_operand" ""))])]
+  "TARGET_SH1"
+  "")
+
+(define_insn "mul_r"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(mult:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		 (match_operand:SI 2 "arith_reg_operand" "z")))]
+  "TARGET_SH2A"
+  "mulr	%2,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_insn "mul_l"
+  [(set (reg:SI MACL_REG)
+	(mult:SI (match_operand:SI 0 "arith_reg_operand" "r")
+		 (match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH2"
+  "mul.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "mulsi3"
+  [(set (reg:SI MACL_REG)
+	(mult:SI  (match_operand:SI 1 "arith_reg_operand" "")
+		  (match_operand:SI 2 "arith_reg_operand" "")))
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACL_REG))]
+  "TARGET_SH1"
+{
+  if (!TARGET_SH2)
+    {
+      /* The address must be set outside the libcall,
+	 since it goes into a pseudo.  */
+      rtx sym = function_symbol (NULL, "__mulsi3", SFUNC_STATIC);
+      rtx addr = force_reg (SImode, sym);
+      rtx insns = gen_mulsi3_call (operands[0], operands[1],
+				   operands[2], addr);
+      emit_insn (insns);
+    }
+  else
+    {
+      rtx macl = gen_rtx_REG (SImode, MACL_REG);
+
+      emit_insn (gen_mul_l (operands[1], operands[2]));
+      /* consec_sets_giv can only recognize the first insn that sets a
+	 giv as the giv insn.  So we must tag this also with a REG_EQUAL
+	 note.  */
+      emit_insn (gen_movsi_i ((operands[0]), macl));
+    }
+  DONE;
+})
+
+(define_insn "mulsidi3_i"
+  [(set (reg:SI MACH_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+	   (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+	  (const_int 32))))
+   (set (reg:SI MACL_REG)
+	(mult:SI (match_dup 0)
+		 (match_dup 1)))]
+  "TARGET_SH2"
+  "dmuls.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+		 (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))))]
+  "TARGET_SH2 || TARGET_SHMEDIA"
+{
+  if (TARGET_SH2)
+    {
+      emit_insn (gen_mulsidi3_compact (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "mulsidi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "extend_reg_operand" "%r"))
+		 (sign_extend:DI (match_operand:SI 2 "extend_reg_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "muls.l	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn_and_split "mulsidi3_compact"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI
+	 (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))
+	 (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx low_dst = gen_lowpart (SImode, operands[0]);
+  rtx high_dst = gen_highpart (SImode, operands[0]);
+
+  emit_insn (gen_mulsidi3_i (operands[1], operands[2]));
+
+  emit_move_insn (low_dst, gen_rtx_REG (SImode, MACL_REG));
+  emit_move_insn (high_dst, gen_rtx_REG (SImode, MACH_REG));
+  /* We need something to tag the possible REG_EQUAL notes on to.  */
+  emit_move_insn (operands[0], operands[0]);
+  DONE;
+})
+
+(define_insn "umulsidi3_i"
+  [(set (reg:SI MACH_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+	   (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+	  (const_int 32))))
+   (set (reg:SI MACL_REG)
+	(mult:SI (match_dup 0)
+		 (match_dup 1)))]
+  "TARGET_SH2"
+  "dmulu.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+		 (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" ""))))]
+  "TARGET_SH2 || TARGET_SHMEDIA"
+{
+  if (TARGET_SH2)
+    {
+      emit_insn (gen_umulsidi3_compact (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+})
+
+(define_insn "umulsidi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_operand" "%r"))
+		 (zero_extend:DI (match_operand:SI 2 "extend_reg_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "mulu.l	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn_and_split "umulsidi3_compact"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI
+	 (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r"))
+	 (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "r"))))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx low_dst = gen_lowpart (SImode, operands[0]);
+  rtx high_dst = gen_highpart (SImode, operands[0]);
+
+  emit_insn (gen_umulsidi3_i (operands[1], operands[2]));
+
+  emit_move_insn (low_dst, gen_rtx_REG (SImode, MACL_REG));
+  emit_move_insn (high_dst, gen_rtx_REG (SImode, MACH_REG));
+  /* We need something to tag the possible REG_EQUAL notes on to.  */
+  emit_move_insn (operands[0], operands[0]);
+  DONE;
+})
+
+(define_insn "smulsi3_highpart_i"
+  [(set (reg:SI MACH_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+	   (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+	  (const_int 32))))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  "dmuls.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "smulsi3_highpart"
+  [(parallel
+    [(set (reg:SI MACH_REG)
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI
+	     (sign_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+	     (sign_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))
+	    (const_int 32))))
+    (clobber (reg:SI MACL_REG))])
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACH_REG))]
+  "TARGET_SH2"
+{
+  rtx insn, mach;
+
+  mach = gen_rtx_REG (SImode, MACH_REG);
+  start_sequence ();
+  emit_insn (gen_smulsi3_highpart_i (operands[1], operands[2]));
+  insn = get_insns ();  
+  end_sequence ();
+  /* expand_binop can't find a suitable code in mul_highpart_optab to
+     make a REG_EQUAL note from, so make one here.
+     See also {,u}mulhisi.
+     ??? Alternatively, we could put this at the calling site of expand_binop,
+     i.e. expand_mult_highpart.  */
+  /* Use emit_libcall_block for loop invariant code motion and to make
+     a REG_EQUAL note.  */
+  emit_libcall_block (insn, operands[0], mach, SET_SRC (single_set (insn)));
+
+  DONE;
+})
+
+(define_insn "umulsi3_highpart_i"
+  [(set (reg:SI MACH_REG)
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 0 "arith_reg_operand" "r"))
+	   (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" "r")))
+	  (const_int 32))))
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SH2"
+  "dmulu.l	%1,%0"
+  [(set_attr "type" "dmpy")])
+
+(define_expand "umulsi3_highpart"
+  [(parallel
+    [(set (reg:SI MACH_REG)
+	  (truncate:SI
+	   (lshiftrt:DI
+	    (mult:DI
+	     (zero_extend:DI (match_operand:SI 1 "arith_reg_operand" ""))
+	     (zero_extend:DI (match_operand:SI 2 "arith_reg_operand" "")))
+	    (const_int 32))))
+    (clobber (reg:SI MACL_REG))])
+   (set (match_operand:SI 0 "arith_reg_operand" "")
+	(reg:SI MACH_REG))]
+  "TARGET_SH2"
+{
+  rtx insn, mach;
+
+  mach = gen_rtx_REG (SImode, MACH_REG);
+  start_sequence ();
+  emit_insn (gen_umulsi3_highpart_i (operands[1], operands[2]));
+  insn = get_insns ();  
+  end_sequence ();
+  /* Use emit_libcall_block for loop invariant code motion and to make
+     a REG_EQUAL note.  */
+  emit_libcall_block (insn, operands[0], mach, SET_SRC (single_set (insn)));
+
+  DONE;
+})
+
+(define_insn_and_split "muldi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(mult:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		 (match_operand:DI 2 "arith_reg_operand" "r")))
+   (clobber (match_scratch:DI 3 "=&r"))
+   (clobber (match_scratch:DI 4 "=r"))]
+  "TARGET_SHMEDIA"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx op3_v2si, op2_v2si;
+
+  op3_v2si = operands[3];
+  if (GET_CODE (op3_v2si) == SIGN_EXTEND)
+    {
+      op3_v2si = XEXP (op3_v2si, 0);
+      op3_v2si = simplify_gen_subreg (DImode, op3_v2si, GET_MODE (op3_v2si), 0);
+    }
+  op3_v2si = simplify_gen_subreg (V2SImode, op3_v2si, DImode, 0);
+  op2_v2si = operands[2];
+  if (GET_CODE (op2_v2si) == SIGN_EXTEND)
+    {
+      op2_v2si = XEXP (op2_v2si, 0);
+      op2_v2si = simplify_gen_subreg (DImode, op2_v2si, GET_MODE (op2_v2si), 0);
+    }
+  op2_v2si = simplify_gen_subreg (V2SImode, op2_v2si, DImode, 0);
+  emit_insn (gen_rotldi3 (operands[3], operands[1], GEN_INT (32)));
+  emit_insn (gen_mulv2si3 (op3_v2si, op3_v2si, op2_v2si));
+  emit_insn (gen_umulsidi3_media (operands[4],
+				 sh_gen_truncate (SImode, operands[1], 0),
+				 sh_gen_truncate (SImode, operands[2], 0)));
+  emit_insn (gen_anddi3 (operands[0], operands[3], GEN_INT (0xffffffff00000000LL)));
+  emit_insn (gen_ashldi3_media (operands[3], operands[3], GEN_INT (32)));
+  emit_insn (gen_adddi3 (operands[0], operands[3], operands[0]));
+  emit_insn (gen_adddi3 (operands[0], operands[4], operands[0]));
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Logical operations
+;; -------------------------------------------------------------------------
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(and:SI (match_operand:SI 1 "logical_reg_operand" "")
+		(match_operand:SI 2 "logical_and_operand" "")))]
+  ""
+{
+  /* If it is possible to turn the and insn into a zero extension
+     already, redundant zero extensions will be folded, which results
+     in better code.  
+     Ideally the splitter of *andsi_compact would be enough, if redundant
+     zero extensions were detected after the combine pass, which does not
+     happen at the moment.  */
+  if (TARGET_SH1)
+    {
+      if (satisfies_constraint_Jmb (operands[2]))
+	{
+	  emit_insn (gen_zero_extendqisi2 (operands[0],
+					   gen_lowpart (QImode, operands[1])));
+	  DONE;
+	}
+      else if (satisfies_constraint_Jmw (operands[2]))
+	{
+	  emit_insn (gen_zero_extendhisi2 (operands[0],
+					   gen_lowpart (HImode, operands[1])));
+	  DONE;
+	}
+    }
+})
+
+(define_insn_and_split "*andsi_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r,z,r")
+	(and:SI (match_operand:SI 1 "arith_reg_operand" "%r,r,0,0")
+		(match_operand:SI 2 "logical_and_operand" "Jmb,Jmw,K08,r")))]
+  "TARGET_SH1"
+  "@
+	extu.b	%1,%0
+	extu.w	%1,%0
+	and	%2,%0
+	and	%2,%0"
+  "&& 1"
+ [(set (match_dup 0) (zero_extend:SI (match_dup 1)))]
+{
+  if (satisfies_constraint_Jmb (operands[2]))
+    operands[1] = gen_lowpart (QImode, operands[1]);
+  else if (satisfies_constraint_Jmw (operands[2]))
+    operands[1] = gen_lowpart (HImode, operands[1]);
+  else
+    FAIL;
+}
+  [(set_attr "type" "arith")])
+
+(define_insn "*andsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(and:SI (match_operand:SI 1 "logical_reg_operand" "%r,r")
+		(match_operand:SI 2 "logical_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	and	%1, %2, %0
+	andi	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*andsi3_bclr"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(and:SI (match_operand:SI 1 "arith_reg_operand" "%0")
+		(match_operand:SI 2 "const_int_operand" "Psz")))]
+  "TARGET_SH2A && satisfies_constraint_Psz (operands[2])"
+  "bclr	%W2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "anddi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r,r")
+	(and:DI (match_operand:DI 1 "arith_reg_operand" "%r,r,r")
+		(match_operand:DI 2 "and_operand" "r,I10,J16")))]
+  "TARGET_SHMEDIA"
+  "@
+	and	%1, %2, %0
+	andi	%1, %2, %0
+	#"
+  "reload_completed
+   && ! logical_operand (operands[2], DImode)"
+  [(const_int 0)]
+{
+  if ((unsigned)INTVAL (operands[2]) == (unsigned) 0xffffffff)
+    emit_insn (gen_mshflo_l_di (operands[0], operands[1], CONST0_RTX (DImode)));
+  else
+    emit_insn (gen_mshfhi_l_di (operands[0], CONST0_RTX (DImode), operands[1]));
+  DONE;
+}
+  [(set_attr "type" "arith_media")])
+
+(define_insn "andcsi3"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(and:SI (match_operand:SI 1 "arith_reg_operand" "r")
+		(not:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "andc	%1,%2,%0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "andcdi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(and:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		(not:DI (match_operand:DI 2 "arith_reg_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "andc	%1,%2,%0"
+  [(set_attr "type" "arith_media")])
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(ior:SI (match_operand:SI 1 "logical_reg_operand" "")
+		(match_operand:SI 2 "logical_operand" "")))]
+  ""
+  "")
+
+(define_insn "*iorsi3_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,z")
+	(ior:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
+		(match_operand:SI 2 "logical_operand" "r,K08")))]
+  "TARGET_SH1
+   && !(TARGET_SH2A && satisfies_constraint_Pso (operands[2]))"
+  "or	%2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*iorsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(ior:SI (match_operand:SI 1 "logical_reg_operand" "%r,r")
+		(match_operand:SI 2 "logical_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	or	%1, %2, %0
+	ori	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*iorsi3_bset"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (match_operand:SI 1 "arith_reg_operand" "%0")
+	(match_operand:SI 2 "const_int_operand" "Pso")))]
+  "TARGET_SH2A && satisfies_constraint_Pso (operands[2])"
+  "bset	%V2,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(ior:DI (match_operand:DI 1 "arith_reg_operand" "%r,r")
+		(match_operand:DI 2 "logical_operand" "r,I10")))]
+  "TARGET_SHMEDIA"
+  "@
+	or	%1, %2, %0
+	ori	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn_and_split "*logical_sidi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:DI (match_operator:SI 3 "logical_operator"
+			  [(match_operand:SI 1 "arith_reg_operand" "%r,r")
+			   (match_operand:SI 2 "logical_operand" "r,I10")])))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 3))]
+{
+  operands[3]
+    = gen_rtx_fmt_ee (GET_CODE (operands[3]), DImode,
+		      simplify_gen_subreg (DImode, operands[1], SImode, 0),
+		      simplify_gen_subreg (DImode, operands[2], SImode, 0));
+})
+
+(define_insn_and_split "*logical_sidisi3"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(truncate:SI (sign_extend:DI
+			(match_operator:SI 3 "logical_operator"
+			  [(match_operand:SI 1 "arith_reg_operand" "%r,r")
+			   (match_operand:SI 2 "logical_operand" "r,I10")]))))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (match_dup 3))])
+
+(define_insn_and_split "*logical_sidi3_2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(sign_extend:DI (truncate:SI (sign_extend:DI
+			(match_operator:SI 3 "logical_operator"
+			  [(match_operand:SI 1 "arith_reg_operand" "%r,r")
+			   (match_operand:SI 2 "logical_operand" "r,I10")])))))]
+  "TARGET_SHMEDIA"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (sign_extend:DI (match_dup 3)))])
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(xor:SI (match_operand:SI 1 "logical_reg_operand" "")
+		(match_operand:SI 2 "xor_operand" "")))]
+  ""
+  "")
+
+(define_insn "*xorsi3_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,r")
+	(xor:SI (match_operand:SI 1 "arith_reg_operand" "%0,0")
+		(match_operand:SI 2 "logical_operand" "K08,r")))]
+  "TARGET_SH1"
+  "xor	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; The *logical_op_t pattern helps combine eliminating sign/zero extensions
+;; of results where one of the inputs is a T bit store.  Notice that this
+;; pattern must not match during reload.  If reload picks this pattern it
+;; will be impossible to split it afterwards.
+(define_insn_and_split "*logical_op_t"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(match_operator:SI 3 "logical_operator"
+	  [(match_operand:SI 1 "arith_reg_operand")
+	   (match_operand:SI 2 "t_reg_operand")]))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 4) (reg:SI T_REG))
+   (set (match_dup 0) (match_dup 3))]
+{
+  operands[4] = gen_reg_rtx (SImode);
+  operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode,
+				operands[1], operands[4]);
+})
+
+(define_insn "*xorsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(xor:SI (match_operand:SI 1 "logical_reg_operand" "%r,r")
+		(match_operand:SI 2 "xor_operand" "r,I06")))]
+  "TARGET_SHMEDIA"
+  "@
+	xor	%1, %2, %0
+	xori	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(xor:DI (match_operand:DI 1 "arith_reg_operand" "%r,r")
+		(match_operand:DI 2 "xor_operand" "r,I06")))]
+  "TARGET_SHMEDIA"
+  "@
+	xor	%1, %2, %0
+	xori	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+;; Combiner bridge pattern for 2 * sign extend -> logical op -> truncate.
+;; converts 2 * sign extend -> logical op into logical op -> sign extend
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(sign_extend:DI (match_operator 4 "binary_logical_operator"
+			  [(match_operand 1 "any_register_operand" "")
+			   (match_operand 2 "any_register_operand" "")])))]
+  "TARGET_SHMEDIA"
+  [(set (match_dup 5) (match_dup 4))
+   (set (match_dup 0) (sign_extend:DI (match_dup 5)))]
+{
+  enum machine_mode inmode = GET_MODE (operands[1]);
+  int offset = 0;
+
+  if (GET_CODE (operands[0]) == SUBREG)
+    {
+      offset = SUBREG_BYTE (operands[0]);
+      operands[0] = SUBREG_REG (operands[0]);
+    }
+  gcc_assert (REG_P (operands[0]));
+  if (TARGET_BIG_ENDIAN)
+    offset += 8 - GET_MODE_SIZE (inmode);
+  operands[5] = gen_rtx_SUBREG (inmode, operands[0], offset);
+})
+
+;; -------------------------------------------------------------------------
+;; Shifts and rotates
+;; -------------------------------------------------------------------------
+
+(define_expand "rotldi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(rotate:DI (match_operand:DI 1 "arith_reg_operand" "")
+		   (match_operand:HI 2 "mextr_bit_offset" "")))]
+  "TARGET_SHMEDIA"
+{
+  if (! mextr_bit_offset (operands[2], HImode))
+    FAIL;
+})
+
+(define_insn "rotldi3_mextr"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(rotate:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		   (match_operand:HI 2 "mextr_bit_offset" "i")))]
+  "TARGET_SHMEDIA"
+{
+  static char templ[16];
+  sprintf (templ, "mextr%d	%%1,%%1,%%0",
+	   8 - (int) (INTVAL (operands[2]) >> 3));
+  return templ;
+}
+  [(set_attr "type" "arith_media")])
+
+(define_expand "rotrdi3"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(rotatert:DI (match_operand:DI 1 "arith_reg_operand" "")
+		     (match_operand:HI 2 "mextr_bit_offset" "")))]
+  "TARGET_SHMEDIA"
+{
+  if (! mextr_bit_offset (operands[2], HImode))
+    FAIL;
+})
+
+(define_insn "rotrdi3_mextr"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(rotatert:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		     (match_operand:HI 2 "mextr_bit_offset" "i")))]
+  "TARGET_SHMEDIA"
+{
+  static char templ[16];
+  sprintf (templ, "mextr%d	%%1,%%1,%%0", (int) INTVAL (operands[2]) >> 3);
+  return templ;
+}
+  [(set_attr "type" "arith_media")])
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(ior:DI (zero_extend:DI (mem:QI (match_operand 1
+					 "ua_address_operand" "")))
+		(ashift:DI (match_operand:DI 2 "arith_reg_operand" "")
+			   (const_int 8))))
+   (clobber (match_operand:DI 3 "register_operand" ""))]
+  "TARGET_SHMEDIA"
+  [(match_dup 4) (match_dup 5)]
+{
+  operands[4] = ((TARGET_LITTLE_ENDIAN ? gen_ldhi_q : gen_ldlo_q)
+		 (operands[3], operands[1]));
+  operands[5] = gen_mextr_rl (operands[0], operands[3], operands[2],
+			      GEN_INT (56), GEN_INT (8));
+})
+
+(define_expand "rotrsi3"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(rotatert:SI (match_operand:SI 1 "arith_reg_operand")
+		     (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_SH1"
+{
+  HOST_WIDE_INT ival = INTVAL (operands[2]);
+  if (ival == 1)
+    {
+      emit_insn (gen_rotrsi3_1 (operands[0], operands[1]));
+      DONE;
+    }
+
+  FAIL;
+})
+
+(define_insn "rotrsi3_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotatert:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (set (reg:SI T_REG)
+	(and:SI (match_dup 1) (const_int 1)))]
+  "TARGET_SH1"
+  "rotr	%0"
+  [(set_attr "type" "arith")])
+
+;; A slimplified version of rotr for combine.
+(define_insn "*rotrsi3_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotatert:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "rotr	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "rotlsi3_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (const_int 1)))
+   (set (reg:SI T_REG)
+	(lshiftrt:SI (match_dup 1) (const_int 31)))]
+  "TARGET_SH1"
+  "rotl	%0"
+  [(set_attr "type" "arith")])
+
+;; A simplified version of rotl for combine.
+(define_insn "*rotlsi3_1"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "rotl	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "rotlsi3_31"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (const_int 31)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "rotr	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "rotlsi3_16"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand" "r")
+		   (const_int 16)))]
+  "TARGET_SH1"
+  "swap.w	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "rotlsi3"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(rotate:SI (match_operand:SI 1 "arith_reg_operand")
+		   (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_SH1"
+{
+  static const char rot_tab[] = {
+    000, 000, 000, 000, 000, 000, 010, 001,
+    001, 001, 011, 013, 003, 003, 003, 003,
+    003, 003, 003, 003, 003, 013, 012, 002,
+    002, 002, 010, 000, 000, 000, 000, 000,
+  };
+
+  int count = INTVAL (operands[2]);
+  int choice = rot_tab[count];
+  if (choice & 010 && SH_DYNAMIC_SHIFT_COST <= 1)
+    FAIL;
+  choice &= 7;
+  switch (choice)
+    {
+    case 0:
+      emit_move_insn (operands[0], operands[1]);
+      count -= (count & 16) * 2;
+      break;
+    case 3:
+     emit_insn (gen_rotlsi3_16 (operands[0], operands[1]));
+     count -= 16;
+     break;
+    case 1:
+    case 2:
+      {
+	rtx parts[2];
+	parts[0] = gen_reg_rtx (SImode);
+	parts[1] = gen_reg_rtx (SImode);
+	emit_insn (gen_rotlsi3_16 (parts[2-choice], operands[1]));
+	emit_move_insn (parts[choice-1], operands[1]);
+	emit_insn (gen_ashlsi3 (parts[0], parts[0], GEN_INT (8)));
+	emit_insn (gen_lshrsi3 (parts[1], parts[1], GEN_INT (8)));
+	emit_insn (gen_iorsi3 (operands[0], parts[0], parts[1]));
+	count = (count & ~16) - 8;
+      }
+    }
+
+  for (; count > 0; count--)
+    emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
+  for (; count < 0; count++)
+    emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+
+  DONE;
+})
+
+(define_insn "rotlhi3_8"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(rotate:HI (match_operand:HI 1 "arith_reg_operand" "r")
+		   (const_int 8)))]
+  "TARGET_SH1"
+  "swap.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "rotlhi3"
+  [(set (match_operand:HI 0 "arith_reg_operand")
+	(rotate:HI (match_operand:HI 1 "arith_reg_operand")
+		   (match_operand:HI 2 "const_int_operand")))]
+  "TARGET_SH1"
+{
+  if (INTVAL (operands[2]) != 8)
+    FAIL;
+})
+
+;; The rotcr and rotcl insns are used primarily in DImode shifts by one.
+;; They can also be used to implement things like
+;;	bool t = a == b;
+;;	int x0 = (y >> 1) | (t << 31);	// rotcr
+;;	int x1 = (y << 1) | t;		// rotcl
+(define_insn "rotcr"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			     (const_int 1))
+		(ashift:SI (match_operand:SI 2 "t_reg_operand")
+			   (const_int 31))))
+   (set (reg:SI T_REG)
+	(and:SI (match_dup 1) (const_int 1)))]
+  "TARGET_SH1"
+  "rotcr	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			   (const_int 1))
+		(match_operand:SI 2 "t_reg_operand")))
+   (set (reg:SI T_REG)
+	(lshiftrt:SI (match_dup 1) (const_int 31)))]
+  "TARGET_SH1"
+  "rotcl	%0"
+  [(set_attr "type" "arith")])
+
+;; Simplified rotcr version for combine, which allows arbitrary shift
+;; amounts for the reg.  If the shift amount is '1' rotcr can be used
+;; directly.  Otherwise we have to insert a shift in between.
+(define_insn_and_split "*rotcr"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+			     (match_operand:SI 2 "const_int_operand"))
+		(ashift:SI (match_operand:SI 3 "arith_reg_or_t_reg_operand")
+			   (const_int 31))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  if (INTVAL (operands[2]) > 1)
+    {
+      const rtx shift_count = GEN_INT (INTVAL (operands[2]) - 1);
+      rtx prev_set_t_insn = NULL_RTX;
+      rtx tmp_t_reg = NULL_RTX;
+
+      /* If we're going to emit a shift sequence that clobbers the T_REG,
+	 try to find the previous insn that sets the T_REG and emit the 
+	 shift insn before that insn, to remove the T_REG dependency.
+	 If the insn that sets the T_REG cannot be found, store the T_REG
+	 in a temporary reg and restore it after the shift.  */
+      if (sh_lshrsi_clobbers_t_reg_p (shift_count)
+	  && ! sh_dynamicalize_shift_p (shift_count))
+	{
+	  prev_set_t_insn = prev_nonnote_insn_bb (curr_insn);
+
+	  /* Skip the nott insn, which was probably inserted by the splitter
+	     of *rotcr_neg_t.  Don't use one of the recog functions
+	     here during insn splitting, since that causes problems in later
+	     passes.  */
+	  if (prev_set_t_insn != NULL_RTX)
+	    {
+	      rtx pat = PATTERN (prev_set_t_insn);
+	      if (GET_CODE (pat) == SET
+		  && t_reg_operand (XEXP (pat, 0), SImode)
+		  && negt_reg_operand (XEXP (pat, 1), SImode))
+	      prev_set_t_insn = prev_nonnote_insn_bb (prev_set_t_insn);
+	    }
+
+	  if (! (prev_set_t_insn != NULL_RTX
+		 && reg_set_p (get_t_reg_rtx (), prev_set_t_insn)
+		 && ! reg_referenced_p (get_t_reg_rtx (),
+					PATTERN (prev_set_t_insn))))
+	    {
+	      prev_set_t_insn = NULL_RTX;
+	      tmp_t_reg = gen_reg_rtx (SImode);
+	      emit_insn (gen_move_insn (tmp_t_reg, get_t_reg_rtx ()));
+	    } 
+	}
+
+      rtx shift_result = gen_reg_rtx (SImode);
+      rtx shift_insn = gen_lshrsi3 (shift_result, operands[1], shift_count);
+      operands[1] = shift_result;
+
+      /* Emit the shift insn before the insn that sets T_REG, if possible.  */
+      if (prev_set_t_insn != NULL_RTX)
+	emit_insn_before (shift_insn, prev_set_t_insn);
+      else
+	emit_insn (shift_insn);
+
+      /* Restore T_REG if it has been saved before.  */
+      if (tmp_t_reg != NULL_RTX)
+	emit_insn (gen_cmpgtsi_t (tmp_t_reg, const0_rtx));
+    }
+
+  /* For the rotcr insn to work, operands[3] must be in T_REG.
+     If it is not we can get it there by shifting it right one bit.
+     In this case T_REG is not an input for this insn, thus we don't have to
+     pay attention as of where to insert the shlr insn.  */
+  if (! t_reg_operand (operands[3], SImode))
+    {
+      /* We don't care about the shifted result here, only the T_REG.  */
+      emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[3]));
+      operands[3] = get_t_reg_rtx ();
+    }
+
+  emit_insn (gen_rotcr (operands[0], operands[1], operands[3]));
+  DONE;
+})
+
+;; If combine tries the same as above but with swapped operands, split
+;; it so that it will try the pattern above.
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_or_t_reg_operand")
+			   (const_int 31))
+		(lshiftrt:SI (match_operand:SI 2 "arith_reg_operand")
+			     (match_operand:SI 3 "const_int_operand"))))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (lshiftrt:SI (match_dup 2) (match_dup 3))
+			   (ashift:SI (match_dup 1) (const_int 31))))
+	      (clobber (reg:SI T_REG))])])
+
+;; Basically the same as the rotcr pattern above, but for rotcl.
+;; FIXME: Fold copy pasted split code for rotcr and rotcl.
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand")
+			   (match_operand:SI 2 "const_int_operand"))
+		(and:SI (match_operand:SI 3 "arith_reg_or_t_reg_operand")
+			(const_int 1))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  gcc_assert (INTVAL (operands[2]) > 0);
+
+  if (INTVAL (operands[2]) > 1)
+    {
+      const rtx shift_count = GEN_INT (INTVAL (operands[2]) - 1);
+      rtx prev_set_t_insn = NULL_RTX;
+      rtx tmp_t_reg = NULL_RTX;
+
+      /* If we're going to emit a shift sequence that clobbers the T_REG,
+	 try to find the previous insn that sets the T_REG and emit the 
+	 shift insn before that insn, to remove the T_REG dependency.
+	 If the insn that sets the T_REG cannot be found, store the T_REG
+	 in a temporary reg and restore it after the shift.  */
+      if (sh_ashlsi_clobbers_t_reg_p (shift_count)
+	  && ! sh_dynamicalize_shift_p (shift_count))
+	{
+	  prev_set_t_insn = prev_nonnote_insn_bb (curr_insn);
+
+	  /* Skip the nott insn, which was probably inserted by the splitter
+	     of *rotcl_neg_t.  Don't use one of the recog functions
+	     here during insn splitting, since that causes problems in later
+	     passes.  */
+	  if (prev_set_t_insn != NULL_RTX)
+	    {
+	      rtx pat = PATTERN (prev_set_t_insn);
+	      if (GET_CODE (pat) == SET
+		  && t_reg_operand (XEXP (pat, 0), SImode)
+		  && negt_reg_operand (XEXP (pat, 1), SImode))
+	      prev_set_t_insn = prev_nonnote_insn_bb (prev_set_t_insn);
+	    }
+
+	  if (! (prev_set_t_insn != NULL_RTX
+		 && reg_set_p (get_t_reg_rtx (), prev_set_t_insn)
+		 && ! reg_referenced_p (get_t_reg_rtx (),
+					PATTERN (prev_set_t_insn))))
+	    {
+	      prev_set_t_insn = NULL_RTX;
+	      tmp_t_reg = gen_reg_rtx (SImode);
+	      emit_insn (gen_move_insn (tmp_t_reg, get_t_reg_rtx ()));
+	    } 
+	}
+
+      rtx shift_result = gen_reg_rtx (SImode);
+      rtx shift_insn = gen_ashlsi3 (shift_result, operands[1], shift_count);
+      operands[1] = shift_result;
+
+      /* Emit the shift insn before the insn that sets T_REG, if possible.  */
+      if (prev_set_t_insn != NULL_RTX)
+	emit_insn_before (shift_insn, prev_set_t_insn);
+      else
+	emit_insn (shift_insn);
+
+      /* Restore T_REG if it has been saved before.  */
+      if (tmp_t_reg != NULL_RTX)
+	emit_insn (gen_cmpgtsi_t (tmp_t_reg, const0_rtx));
+    }
+
+  /* For the rotcl insn to work, operands[3] must be in T_REG.
+     If it is not we can get it there by shifting it right one bit.
+     In this case T_REG is not an input for this insn, thus we don't have to
+     pay attention as of where to insert the shlr insn.  */
+  if (! t_reg_operand (operands[3], SImode))
+    {
+      /* We don't care about the shifted result here, only the T_REG.  */
+      emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[3]));
+      operands[3] = get_t_reg_rtx ();
+    }
+
+  emit_insn (gen_rotcl (operands[0], operands[1], operands[3]));
+  DONE;
+})
+
+;; rotcl combine pattern variations
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand")
+			   (match_operand:SI 2 "const_int_operand"))
+		(match_operand:SI 3 "t_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 1) (match_dup 2))
+			   (and:SI (match_dup 3) (const_int 1))))
+	      (clobber (reg:SI T_REG))])])
+
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (and:SI (match_operand:SI 1 "arith_reg_or_t_reg_operand")
+			(const_int 1))
+		(ashift:SI (match_operand:SI 2 "arith_reg_operand")
+			   (match_operand:SI 3 "const_int_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 2) (match_dup 3))
+			   (and:SI (match_dup 1) (const_int 1))))
+	      (clobber (reg:SI T_REG))])])
+
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand")
+			   (match_operand:SI 2 "const_int_operand"))
+		(lshiftrt:SI (match_operand:SI 3 "arith_reg_operand")
+			     (const_int 31))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 1) (match_dup 2))
+			   (and:SI (reg:SI T_REG) (const_int 1))))
+	      (clobber (reg:SI T_REG))])]
+{
+  /* We don't care about the result of the left shift, only the T_REG.  */
+  emit_insn (gen_shll (gen_reg_rtx (SImode), operands[3]));
+})
+
+(define_insn_and_split "*rotcl"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (lshiftrt:SI (match_operand:SI 3 "arith_reg_operand")
+			     (const_int 31))
+		(ashift:SI (match_operand:SI 1 "arith_reg_operand")
+			   (match_operand:SI 2 "const_int_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 1) (match_dup 2))
+			   (and:SI (reg:SI T_REG) (const_int 1))))
+	      (clobber (reg:SI T_REG))])]
+{
+  /* We don't care about the result of the left shift, only the T_REG.  */
+  emit_insn (gen_shll (gen_reg_rtx (SImode), operands[3]));
+})
+
+;; rotcr combine bridge pattern which will make combine try out more
+;; complex patterns.
+(define_insn_and_split "*rotcr"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ashift:SI (match_operand:SI 1 "t_reg_operand") (const_int 31)))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (match_dup 1))
+   (parallel [(set (match_dup 0)
+		   (ior:SI (lshiftrt:SI (match_dup 0) (const_int 1))
+			   (ashift:SI (match_dup 1) (const_int 31))))
+	      (set (reg:SI T_REG)
+		   (and:SI (match_dup 0) (const_int 1)))])])
+
+(define_insn_and_split "*rotcr"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand")
+			(const_int -2147483648)) ;; 0xffffffff80000000
+		(lshiftrt:SI (match_operand:SI 2 "arith_reg_operand")
+			     (const_int 1))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_shll (tmp, operands[1]));
+  emit_insn (gen_rotcr (operands[0], operands[2], get_t_reg_rtx ()));
+  DONE;
+})
+
+;; rotcr combine patterns for rotating in the negated T_REG value.
+(define_insn_and_split "*rotcr_neg_t"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (match_operand:SI 1 "negt_reg_shl31_operand")
+		(lshiftrt:SI (match_operand:SI 2 "arith_reg_operand")
+			     (match_operand:SI 3 "const_int_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (lshiftrt:SI (match_dup 2) (match_dup 3))
+			   (ashift:SI (reg:SI T_REG) (const_int 31))))
+	      (clobber (reg:SI T_REG))])]
+{
+  emit_insn (gen_nott (get_t_reg_rtx ()));
+})
+
+(define_insn_and_split "*rotcr_neg_t"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+			     (match_operand:SI 2 "const_int_operand"))
+		(match_operand:SI 3 "negt_reg_shl31_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (lshiftrt:SI (match_dup 1) (match_dup 2))
+			   (ashift:SI (reg:SI T_REG) (const_int 31))))
+	      (clobber (reg:SI T_REG))])]
+{
+  emit_insn (gen_nott (get_t_reg_rtx ()));
+})
+
+;; rotcl combine patterns for rotating in the negated T_REG value.
+;; For some strange reason these have to be specified as splits which combine
+;; will pick up.  If they are specified as insn_and_split like the
+;; *rotcr_neg_t patterns above, combine would recognize them successfully
+;; but not emit them on non-SH2A targets.
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (match_operand:SI 1 "negt_reg_operand")
+		(ashift:SI (match_operand:SI 2 "arith_reg_operand")
+			   (match_operand:SI 3 "const_int_operand"))))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))
+   (parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 2) (match_dup 3))
+			   (and:SI (reg:SI T_REG) (const_int 1))))
+	      (clobber (reg:SI T_REG))])])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(ior:SI (ashift:SI (match_operand:SI 2 "arith_reg_operand")
+			   (match_operand:SI 3 "const_int_operand"))
+		(match_operand:SI 1 "negt_reg_operand")))]
+  "TARGET_SH1"
+  [(set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1)))
+   (parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 2) (match_dup 3))
+			   (and:SI (reg:SI T_REG) (const_int 1))))
+	      (clobber (reg:SI T_REG))])])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; SImode shift left
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "")
+		   (match_operand:SI 2 "shift_count_operand" "")))]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ashlsi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (TARGET_DYNSHIFT
+      && CONST_INT_P (operands[2]) && sh_dynamicalize_shift_p (operands[2]))
+      operands[2] = force_reg (SImode, operands[2]);
+
+  /*  If the ashlsi3_* insn is going to clobber the T_REG it must be
+      expanded here.  */
+  if (CONST_INT_P (operands[2])
+      && sh_ashlsi_clobbers_t_reg_p (operands[2])
+      && ! sh_dynamicalize_shift_p (operands[2]))
+    {
+      emit_insn (gen_ashlsi3_n_clobbers_t (operands[0], operands[1],
+					   operands[2]));
+      DONE;
+    }
+
+  /* Expand a library call for the dynamic shift.  */
+  if (!CONST_INT_P (operands[2]) && !TARGET_DYNSHIFT)
+    {
+      emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
+      rtx funcaddr = gen_reg_rtx (Pmode);
+      function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC);
+      emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr));
+
+      DONE;
+    }
+})
+
+(define_insn "ashlsi3_k"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0,0")
+		   (match_operand:SI 2 "p27_shift_count_operand" "M,P27")))]
+  "TARGET_SH1"
+  "@
+	add	%0,%0
+	shll%O2	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "ashlsi3_d"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (match_operand:SI 2 "shift_count_operand" "r")))]
+  "TARGET_DYNSHIFT"
+  "shld	%2,%0"
+  "&& CONST_INT_P (operands[2]) && ! sh_dynamicalize_shift_p (operands[2])
+   && ! sh_ashlsi_clobbers_t_reg_p (operands[2])"
+  [(const_int 0)]
+{
+  if (satisfies_constraint_P27 (operands[2]))
+    {
+      emit_insn (gen_ashlsi3_k (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (! satisfies_constraint_P27 (operands[2]))
+    {
+      /* This must happen before reload, otherwise the constant will be moved
+	 into a register due to the "r" constraint, after which this split
+	 cannot be done anymore.
+	 Unfortunately the move insn will not always be eliminated.
+	 Also, here we must not create a shift sequence that clobbers the
+	 T_REG.  */
+      emit_move_insn (operands[0], operands[1]);
+      gen_shifty_op (ASHIFT, operands);
+      DONE;
+    }
+
+  FAIL;
+}
+  [(set_attr "type" "dyn_shift")])
+
+;; If dynamic shifts are not available use a library function.
+;; By specifying the pattern we reduce the number of call clobbered regs.
+;; In order to make combine understand the truncation of the shift amount
+;; operand we have to allow it to use pseudo regs for the shift operands.
+(define_insn "ashlsi3_d_call"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+	(ashift:SI (reg:SI R4_REG)
+		   (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+			   (const_int 31))))
+   (use (match_operand:SI 2 "arith_reg_operand" "r"))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH1 && !TARGET_DYNSHIFT"
+  "jsr	@%2%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn_and_split "ashlsi3_n"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (match_operand:SI 2 "not_p27_shift_count_operand" "")))]
+  "TARGET_SH1 && ! sh_ashlsi_clobbers_t_reg_p (operands[2])"
+  "#"
+  "&& (reload_completed
+       || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))"
+  [(const_int 0)]
+{
+  if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ())
+    {
+      /* If this pattern was picked and dynamic shifts are supported, switch
+	 to dynamic shift pattern before reload.  */
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_ashlsi3_d (operands[0], operands[1], operands[2]));
+    }
+  else
+    gen_shifty_op (ASHIFT, operands);
+
+  DONE;
+})
+
+(define_insn_and_split "ashlsi3_n_clobbers_t"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		   (match_operand:SI 2 "not_p27_shift_count_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && sh_ashlsi_clobbers_t_reg_p (operands[2])"
+  "#"
+  "&& (reload_completed || INTVAL (operands[2]) == 31
+       || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))"
+  [(const_int 0)]
+{
+  if (INTVAL (operands[2]) == 31)
+    {
+      /* If the shift amount is 31 we split into a different sequence before
+	 reload so that it gets a chance to allocate R0 for the sequence.
+	 If it fails to do so (due to pressure on R0), it will take one insn
+	 more for the and.  */
+      emit_insn (gen_andsi3 (operands[0], operands[1], const1_rtx));
+      emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
+    }
+  else if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ())
+    {
+      /* If this pattern was picked and dynamic shifts are supported, switch
+	 to dynamic shift pattern before reload.  */
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_ashlsi3_d (operands[0], operands[1], operands[2]));
+    }
+  else
+    gen_shifty_op (ASHIFT, operands);
+
+  DONE;
+})
+
+(define_insn "shll"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") (const_int 1)))
+   (set (reg:SI T_REG)
+	(lt:SI (match_dup 1) (const_int 0)))]
+  "TARGET_SH1"
+  "shll	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*ashlsi_c_void"
+  [(set (reg:SI T_REG)
+	(lt:SI (match_operand:SI 0 "arith_reg_operand" "r") (const_int 0)))
+   (clobber (match_scratch:SI 1 "=0"))]
+  "TARGET_SH1 && cse_not_expected"
+  "shll	%0"
+  [(set_attr "type" "arith")])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "") (const_int 0))
+   (set (reg:SI T_REG)
+	(gt:SI (match_dup 0) (match_operand:SI 1 "arith_reg_operand" "")))]
+  "TARGET_SH1
+   && peep2_reg_dead_p (2, operands[0])
+   && peep2_reg_dead_p (2, operands[1])"
+  [(const_int 0)]
+{
+  emit_insn (gen_shll (operands[1], operands[1]));
+  DONE;
+})
+
+(define_insn "ashlsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(ashift:SI (match_operand:SI 1 "extend_reg_operand" "r,r")
+		   (match_operand:SI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA"
+  "@
+	shlld.l	%1, %2, %0
+	shlli.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; HImode shift left
+
+(define_expand "ashlhi3"
+  [(parallel [(set (match_operand:HI 0 "arith_reg_operand" "")
+		   (ashift:HI (match_operand:HI 1 "arith_reg_operand" "")
+			      (match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  "TARGET_SH1"
+{
+  if (!CONST_INT_P (operands[2]))
+    FAIL;
+  /* It may be possible to call gen_ashlhi3 directly with more generic
+     operands.  Make sure operands[1] is a HImode register here.  */
+  if (!arith_reg_operand (operands[1], HImode))
+    operands[1] = copy_to_mode_reg (HImode, operands[1]);
+})
+
+(define_insn "ashlhi3_k"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r,r")
+	(ashift:HI (match_operand:HI 1 "arith_reg_operand" "0,0")
+		   (match_operand:HI 2 "const_int_operand" "M,P27")))]
+  "TARGET_SH1 && satisfies_constraint_P27 (operands[2])"
+  "@
+	add	%0,%0
+	shll%O2	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "*ashlhi3_n"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(ashift:HI (match_operand:HI 1 "arith_reg_operand" "0")
+		   (match_operand:HI 2 "const_int_operand" "n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& reload_completed"
+  [(use (reg:SI R0_REG))]
+{
+  gen_shifty_hi_op (ASHIFT, operands);
+  DONE;
+})
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; DImode shift left
+
+(define_expand "ashldi3"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+		   (ashift:DI (match_operand:DI 1 "arith_reg_operand" "")
+			      (match_operand:DI 2 "immediate_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ashldi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1)
+    {
+      emit_insn (gen_ashldi3_k (operands[0], operands[1]));
+      DONE;
+    }
+  else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 32)
+    {
+      emit_insn (gen_ashldi3_std (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else
+    FAIL;
+})
+
+;; Expander for DImode shift left with SImode operations.
+(define_expand "ashldi3_std"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "r")
+                   (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SH1 && INTVAL (operands[2]) < 32"
+{
+  rtx low_src = gen_lowpart (SImode, operands[1]);
+  rtx high_src = gen_highpart (SImode, operands[1]);
+  rtx dst = gen_reg_rtx (DImode);
+  rtx low_dst = gen_lowpart (SImode, dst);
+  rtx high_dst = gen_highpart (SImode, dst);
+  rtx tmp0 = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (SImode);
+
+  emit_insn (gen_lshrsi3 (tmp0, low_src, GEN_INT (32 - INTVAL (operands[2]))));
+  emit_insn (gen_ashlsi3 (low_dst, low_src, operands[2]));  
+  emit_insn (gen_ashlsi3 (tmp1, high_src, operands[2]));  
+  emit_insn (gen_iorsi3 (high_dst, tmp0, tmp1));
+  emit_move_insn (operands[0], dst);
+  DONE;
+})
+
+(define_insn_and_split "ashldi3_k"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		   (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx high = gen_highpart (SImode, operands[0]);
+  rtx low = gen_lowpart (SImode, operands[0]);
+  emit_insn (gen_shll (low, low));
+  emit_insn (gen_rotcl (high, high, get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn "ashldi3_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,r")
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "r,r")
+		   (match_operand:DI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA"
+  "@
+	shlld	%1, %2, %0
+	shlli	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*ashldisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0)
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		   (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SHMEDIA && INTVAL (operands[2]) < 32"
+  "shlli.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; SImode arithmetic shift right
+;;
+;; We can't do HImode right shifts correctly unless we start out with an
+;; explicit zero / sign extension; doing that would result in worse overall
+;; code, so just let the machine independent code widen the mode.
+;; That's why we don't have ashrhi3_k / lshrhi3_k / lshrhi3_m / lshrhi3 .
+
+(define_expand "ashrsi3"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_dest" "")
+		   (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+				(match_operand:SI 2 "nonmemory_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ashrsi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (expand_ashiftrt (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn "shar"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (set (reg:SI T_REG)
+	(and:SI (match_dup 1) (const_int 1)))]
+  "TARGET_SH1"
+  "shar	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "ashrsi3_k"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "const_int_operand" "M")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && INTVAL (operands[2]) == 1"
+  "shar	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "ashrsi2_16"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+        (ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "r")
+                     (const_int 16)))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (rotate:SI (match_dup 1) (const_int 16)))
+   (set (match_dup 0) (sign_extend:SI (match_dup 2)))]
+{
+  operands[2] = gen_lowpart (HImode, operands[0]);
+})
+
+(define_insn_and_split "ashrsi2_31"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (const_int 31)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  emit_insn (gen_shll (operands[0], operands[1]));
+  emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn "ashrsi3_d"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
+  "TARGET_DYNSHIFT"
+  "shad	%2,%0"
+  [(set_attr "type" "dyn_shift")])
+
+(define_insn "ashrsi3_n"
+  [(set (reg:SI R4_REG)
+	(ashiftrt:SI (reg:SI R4_REG)
+		     (match_operand:SI 0 "const_int_operand" "i")))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))
+   (use (match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jsr	@%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "ashrsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(ashiftrt:SI (match_operand:SI 1 "extend_reg_operand" "r,r")
+		     (match_operand:SI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA"
+  "@
+	shard.l	%1, %2, %0
+	shari.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; DImode arithmetic shift right
+
+(define_expand "ashrdi3"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+		   (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "")
+				(match_operand:DI 2 "immediate_operand" "")))
+	      (clobber (reg:SI T_REG))])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ashrdi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (!CONST_INT_P (operands[2]) || INTVAL (operands[2]) != 1)
+    FAIL;
+})
+
+(define_insn_and_split "ashrdi3_k"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx high = gen_highpart (SImode, operands[0]);
+  rtx low = gen_lowpart (SImode, operands[0]);
+  emit_insn (gen_shar (high, high));
+  emit_insn (gen_rotcr (low, low, get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn "ashrdi3_media"
+  [(set (match_operand:DI 0 "ext_dest_operand" "=r,r")
+	(ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r,r")
+		     (match_operand:DI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA
+   && (arith_reg_dest (operands[0], DImode)
+       || (CONST_INT_P (operands[2]) && INTVAL (operands[2]) >= 32))"
+  "@
+	shard	%1, %2, %0
+	shari	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*ashrdisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0)
+	(ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SHMEDIA && INTVAL (operands[2]) < 32"
+  "shari.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "ashrdisi3_media_high"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(truncate:SI
+	   (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r")
+			(match_operand:DI 2 "const_int_operand" "n"))))]
+  "TARGET_SHMEDIA && INTVAL (operands[2]) >= 32"
+  "shari	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "ashrdisi3_media_opaque"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(unspec:SI [(match_operand:DI 1 "arith_reg_operand" "r")
+		    (match_operand:DI 2 "const_int_operand" "n")]
+	 UNSPEC_ASHIFTRT))]
+  "TARGET_SHMEDIA"
+  "shari	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; SImode logical shift right
+
+(define_expand "lshrsi3"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "")
+		     (match_operand:SI 2 "shift_count_operand" "")))]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_lshrsi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+
+  /* If a dynamic shift is supposed to be used, expand the lshrsi3_d insn
+     here, otherwise the pattern will never match due to the shift amount reg
+     negation.  */
+  if (TARGET_DYNSHIFT
+      && CONST_INT_P (operands[2]) && sh_dynamicalize_shift_p (operands[2]))
+    {
+      rtx neg_count = force_reg (SImode,
+			         gen_int_mode (- INTVAL (operands[2]), SImode));
+      emit_insn (gen_lshrsi3_d (operands[0], operands[1], neg_count));
+      DONE;
+    }
+
+  if (TARGET_DYNSHIFT && ! CONST_INT_P (operands[2]))
+    {
+      rtx neg_count = gen_reg_rtx (SImode);
+      emit_insn (gen_negsi2 (neg_count, operands[2]));
+      emit_insn (gen_lshrsi3_d (operands[0], operands[1], neg_count));
+      DONE;
+    }
+
+  /* If the lshrsi3_* insn is going to clobber the T_REG it must be
+     expanded here.  */
+  if (CONST_INT_P (operands[2])
+      && sh_lshrsi_clobbers_t_reg_p (operands[2])
+      && ! sh_dynamicalize_shift_p (operands[2]))
+    {
+      emit_insn (gen_lshrsi3_n_clobbers_t (operands[0], operands[1],
+		 operands[2]));
+      DONE;
+    }
+
+  /* Expand a library call for the dynamic shift.  */
+  if (!CONST_INT_P (operands[2]) && !TARGET_DYNSHIFT)
+    {
+      emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
+      rtx funcaddr = gen_reg_rtx (Pmode);
+      function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC);
+      emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr));
+      DONE;
+    }
+})
+
+(define_insn "lshrsi3_k"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "p27_rshift_count_operand" "P27")))]
+  "TARGET_SH1"
+  "shlr%O2	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "lshrsi3_d"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (neg:SI (match_operand:SI 2 "shift_count_operand" "r"))))]
+  "TARGET_DYNSHIFT"
+  "shld	%2,%0"
+  "&& CONST_INT_P (operands[2]) && ! sh_dynamicalize_shift_p (operands[2])
+   && ! sh_lshrsi_clobbers_t_reg_p (operands[2])"
+  [(const_int 0)]
+{
+  if (satisfies_constraint_P27 (operands[2]))
+    {
+      /* This will not be done for a shift amount of 1, because it would
+	 clobber the T_REG.  */
+      emit_insn (gen_lshrsi3_k (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  else if (! satisfies_constraint_P27 (operands[2]))
+    {
+      /* This must happen before reload, otherwise the constant will be moved
+	 into a register due to the "r" constraint, after which this split
+	 cannot be done anymore.
+	 Unfortunately the move insn will not always be eliminated.
+	 Also, here we must not create a shift sequence that clobbers the
+	 T_REG.  */
+      emit_move_insn (operands[0], operands[1]);
+      gen_shifty_op (LSHIFTRT, operands);
+      DONE;
+    }
+
+  FAIL;
+}
+  [(set_attr "type" "dyn_shift")])
+
+;; If dynamic shifts are not available use a library function.
+;; By specifying the pattern we reduce the number of call clobbered regs.
+;; In order to make combine understand the truncation of the shift amount
+;; operand we have to allow it to use pseudo regs for the shift operands.
+(define_insn "lshrsi3_d_call"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+	(lshiftrt:SI (reg:SI R4_REG)
+		     (and:SI (match_operand:SI 1 "arith_reg_operand" "z")
+			     (const_int 31))))
+   (use (match_operand:SI 2 "arith_reg_operand" "r"))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH1 && !TARGET_DYNSHIFT"
+  "jsr	@%2%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn_and_split "lshrsi3_n"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "not_p27_rshift_count_operand")))]
+  "TARGET_SH1 && ! sh_lshrsi_clobbers_t_reg_p (operands[2])"
+  "#"
+  "&& (reload_completed
+       || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))"
+  [(const_int 0)]
+{
+  if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ())
+    {
+      /* If this pattern was picked and dynamic shifts are supported, switch
+	 to dynamic shift pattern before reload.  */
+      operands[2] = force_reg (SImode,
+			       gen_int_mode (- INTVAL (operands[2]), SImode));
+      emit_insn (gen_lshrsi3_d (operands[0], operands[1], operands[2]));
+    }
+  else
+    gen_shifty_op (LSHIFTRT, operands);
+
+  DONE;
+})
+
+;; The lshrsi3_n_clobbers_t pattern also works as a simplified version of
+;; the shlr pattern.
+(define_insn_and_split "lshrsi3_n_clobbers_t"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (match_operand:SI 2 "not_p27_rshift_count_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && sh_lshrsi_clobbers_t_reg_p (operands[2])"
+  "#"
+  "&& (reload_completed || INTVAL (operands[2]) == 31
+       || (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ()))"
+  [(const_int 0)]
+{
+  if (INTVAL (operands[2]) == 31)
+    {
+      emit_insn (gen_shll (operands[0], operands[1]));
+      emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
+    }
+  else if (sh_dynamicalize_shift_p (operands[2]) && can_create_pseudo_p ())
+    {
+      /* If this pattern was picked and dynamic shifts are supported, switch
+	 to dynamic shift pattern before reload.  */
+      operands[2] = force_reg (SImode,
+			       gen_int_mode (- INTVAL (operands[2]), SImode));
+      emit_insn (gen_lshrsi3_d (operands[0], operands[1], operands[2]));
+    }
+  else
+    gen_shifty_op (LSHIFTRT, operands);
+
+  DONE;
+})
+
+(define_insn "shlr"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (set (reg:SI T_REG)
+	(and:SI (match_dup 1) (const_int 1)))]
+  "TARGET_SH1"
+  "shlr	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "lshrsi3_media"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(lshiftrt:SI (match_operand:SI 1 "extend_reg_operand" "r,r")
+		     (match_operand:SI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA"
+  "@
+	shlrd.l	%1, %2, %0
+	shlri.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; DImode logical shift right
+
+(define_expand "lshrdi3"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_operand" "")
+		   (lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "")
+			       (match_operand:DI 2 "immediate_operand" "")))
+	     (clobber (reg:SI T_REG))])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_lshrdi3_media (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  if (!CONST_INT_P (operands[2]) || INTVAL (operands[2]) != 1)
+    FAIL;
+})
+
+(define_insn_and_split "lshrdi3_k"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "0")
+		     (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx high = gen_highpart (SImode, operands[0]);
+  rtx low = gen_lowpart (SImode, operands[0]);
+  emit_insn (gen_shlr (high, high));
+  emit_insn (gen_rotcr (low, low, get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn "lshrdi3_media"
+  [(set (match_operand:DI 0 "ext_dest_operand" "=r,r")
+	(lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r,r")
+		     (match_operand:DI 2 "shift_count_operand" "r,n")))]
+  "TARGET_SHMEDIA
+   && (arith_reg_dest (operands[0], DImode)
+       || (CONST_INT_P (operands[2]) && INTVAL (operands[2]) > 32))"
+  "@
+	shlrd	%1, %2, %0
+	shlri	%1, %2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*lshrdisi3_media"
+  [(set (subreg:DI (match_operand:SI 0 "arith_reg_operand" "=r") 0)
+	(lshiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SHMEDIA && INTVAL (operands[2]) < 32"
+  "shlri.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+;; Combined left/right shifts
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+			   (match_operand:SI 2 "const_int_operand" ""))
+		(match_operand:SI 3 "const_int_operand" "")))]
+  "TARGET_SH1 && reload_completed && (unsigned)INTVAL (operands[2]) < 32"
+  [(use (reg:SI R0_REG))]
+{
+  if (gen_shl_and (operands[0], operands[2], operands[3], operands[1]))
+    FAIL;
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
+			   (match_operand:SI 2 "const_int_operand" ""))
+		(match_operand:SI 3 "const_int_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && reload_completed && (unsigned)INTVAL (operands[2]) < 32"
+  [(use (reg:SI R0_REG))]
+{
+  if (gen_shl_and (operands[0], operands[2], operands[3], operands[1]))
+    FAIL;
+  DONE;
+})
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			   (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && shl_and_kind (operands[2], operands[3], 0) == 1"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 5))
+	       (const_string "10")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 6))
+	       (const_string "12")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 7))
+	       (const_string "14")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 8))
+	       (const_string "16")]
+	      (const_string "18")))
+   (set_attr "type" "arith")])
+
+(define_insn ""
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "0")
+			   (match_operand:SI 2 "const_int_operand" "n"))
+		(match_operand:SI 3 "const_int_operand" "n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && shl_and_kind (operands[2], operands[3], 0) == 2"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_and_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_and_length (insn)") (const_int 4))
+	       (const_string "8")]
+	      (const_string "10")))
+   (set_attr "type" "arith")])
+
+;; shift left / and combination with a scratch register: The combine pass
+;; does not accept the individual instructions, even though they are
+;; cheap.  But it needs a precise description so that it is usable after
+;; reload.
+(define_insn "and_shl_scratch"
+  [(set (match_operand:SI 0 "register_operand" "=r,&r")
+	(lshiftrt:SI
+	 (ashift:SI
+	  (and:SI
+	   (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,0")
+			(match_operand:SI 2 "const_int_operand" "N,n"))
+	   (match_operand:SI 3 "" "0,r"))
+	  (match_operand:SI 4 "const_int_operand" "n,n"))
+	 (match_operand:SI 5 "const_int_operand" "n,n")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_and_scr_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_and_scr_length (insn)") (const_int 5))
+	       (const_string "10")]
+	      (const_string "12")))
+   (set_attr "type" "arith")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(lshiftrt:SI
+	 (ashift:SI
+	  (and:SI
+	   (lshiftrt:SI (match_operand:SI 1 "register_operand" "")
+			(match_operand:SI 2 "const_int_operand" ""))
+	   (match_operand:SI 3 "register_operand" ""))
+	  (match_operand:SI 4 "const_int_operand" ""))
+	 (match_operand:SI 5 "const_int_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(use (reg:SI R0_REG))]
+{
+  rtx and_source = operands[rtx_equal_p (operands[0], operands[1]) ? 3 : 1];
+
+  if (INTVAL (operands[2]))
+    {
+      gen_shifty_op (LSHIFTRT, operands);
+    }
+  emit_insn (gen_andsi3 (operands[0], operands[0], and_source));
+  operands[2] = operands[4];
+  gen_shifty_op (ASHIFT, operands);
+  if (INTVAL (operands[5]))
+    {
+      operands[2] = operands[5];
+      gen_shifty_op (LSHIFTRT, operands);
+    }
+  DONE;
+})
+
+;; signed left/right shift combination.
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+        (sign_extract:SI
+	 (ashift:SI (match_operand:SI 1 "register_operand" "")
+		    (match_operand:SI 2 "const_int_operand" ""))
+	 (match_operand:SI 3 "const_int_operand" "")
+	 (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  [(use (reg:SI R0_REG))]
+{
+  if (gen_shl_sext (operands[0], operands[2], operands[3], operands[1]))
+    FAIL;
+  DONE;
+})
+
+(define_insn "shl_sext_ext"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (sign_extract:SI
+	 (ashift:SI (match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "const_int_operand" "n"))
+	 (match_operand:SI 3 "const_int_operand" "n")
+	 (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && (unsigned)shl_sext_kind (operands[2], operands[3], 0) - 1 < 5"
+  "#"
+  [(set (attr "length")
+	(cond [(match_test "shl_sext_length (insn)")
+	       (const_string "2")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 2))
+	       (const_string "4")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 5))
+	       (const_string "10")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 6))
+	       (const_string "12")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 7))
+	       (const_string "14")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 8))
+	       (const_string "16")]
+	      (const_string "18")))
+    (set_attr "type" "arith")])
+
+(define_insn "shl_sext_sub"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+        (sign_extract:SI
+	 (ashift:SI (match_operand:SI 1 "register_operand" "0")
+		    (match_operand:SI 2 "const_int_operand" "n"))
+	 (match_operand:SI 3 "const_int_operand" "n")
+	 (const_int 0)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && (shl_sext_kind (operands[2], operands[3], 0) & ~1) == 6"
+  "#"
+  [(set (attr "length")
+	(cond [(eq (symbol_ref "shl_sext_length (insn)") (const_int 3))
+	       (const_string "6")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 4))
+	       (const_string "8")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 5))
+	       (const_string "10")
+	       (eq (symbol_ref "shl_sext_length (insn)") (const_int 6))
+	       (const_string "12")]
+	      (const_string "14")))
+    (set_attr "type" "arith")])
+
+;; The xtrct_left and xtrct_right patterns are used in expansions of DImode
+;; shifts by 16, and allow the xtrct instruction to be generated from C
+;; source.
+(define_insn "xtrct_left"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+        (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r")
+			   (const_int 16))
+ 	        (lshiftrt:SI (match_operand:SI 2 "arith_reg_operand" "0")
+			     (const_int 16))))]
+  "TARGET_SH1"
+  "xtrct	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "xtrct_right"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+        (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			     (const_int 16))
+ 	        (ashift:SI (match_operand:SI 2 "arith_reg_operand" "r")
+			   (const_int 16))))]
+  "TARGET_SH1"
+  "xtrct	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; -------------------------------------------------------------------------
+;; Unary arithmetic
+;; -------------------------------------------------------------------------
+
+(define_insn "negc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(neg:SI (plus:SI (reg:SI T_REG)
+			 (match_operand:SI 1 "arith_reg_operand" "r"))))
+   (set (reg:SI T_REG)
+	(ne:SI (ior:SI (reg:SI T_REG) (match_dup 1))
+	       (const_int 0)))]
+  "TARGET_SH1"
+  "negc	%1,%0"
+  [(set_attr "type" "arith")])
+
+;; A simplified version of the negc insn, where the exact value of the
+;; T bit doesn't matter.  This is easier for combine to pick up.
+;; Notice that '0 - x - 1' is the same as '~x', thus we don't specify
+;; extra patterns for this case.
+(define_insn "*negc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (neg:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+		  (match_operand:SI 2 "t_reg_operand" "")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "negc	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "*negdi_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(neg:DI (match_operand:DI 1 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "sub	r63, %1, %0"
+  [(set_attr "type" "arith_media")])
+
+;; Don't split into individual negc insns immediately so that neg:DI (abs:DI)
+;; can be combined.
+(define_expand "negdi2"
+  [(parallel [(set (match_operand:DI 0 "arith_reg_dest")
+		   (neg:DI (match_operand:DI 1 "arith_reg_operand")))
+	      (clobber (reg:SI T_REG))])]
+  "TARGET_SH1")
+
+(define_insn_and_split "*negdi2"
+  [(set (match_operand:DI 0 "arith_reg_dest")
+	(neg:DI (match_operand:DI 1 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_clrt ());
+  emit_insn (gen_negc (gen_lowpart (SImode, operands[0]),
+		       gen_lowpart (SImode, operands[1])));
+  emit_insn (gen_negc (gen_highpart (SImode, operands[0]),
+		       gen_highpart (SImode, operands[1])));
+  DONE;
+})
+
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(neg:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "neg	%1,%0"
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "one_cmplsi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(not:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "not	%1,%0"
+  "&& can_create_pseudo_p ()"
+  [(set (reg:SI T_REG) (ge:SI (match_dup 1) (const_int 0)))
+   (set (match_dup 0) (reg:SI T_REG))]
+{
+/* PR 54685
+   If the result of 'unsigned int <= 0x7FFFFFFF' ends up as the following
+   sequence:
+
+     (set (reg0) (not:SI (reg0) (reg1)))
+     (parallel [(set (reg2) (lshiftrt:SI (reg0) (const_int 31)))
+		(clobber (reg:SI T_REG))])
+
+   ... match and combine the sequence manually in the split pass after the
+   combine pass.  Notice that combine does try the target pattern of this
+   split, but if the pattern is added it interferes with other patterns, in
+   particular with the div0s comparisons.
+   This could also be done with a peephole but doing it here before register
+   allocation can save one temporary.
+   When we're here, the not:SI pattern obviously has been matched already
+   and we only have to see whether the following insn is the left shift.  */
+
+  rtx i = next_nonnote_insn_bb (curr_insn);
+  if (i == NULL_RTX || !NONJUMP_INSN_P (i))
+    FAIL;
+
+  rtx p = PATTERN (i);
+  if (GET_CODE (p) != PARALLEL || XVECLEN (p, 0) != 2)
+    FAIL;
+
+  rtx p0 = XVECEXP (p, 0, 0);
+  rtx p1 = XVECEXP (p, 0, 1);
+
+  if (/* (set (reg2) (lshiftrt:SI (reg0) (const_int 31)))  */
+      GET_CODE (p0) == SET
+      && GET_CODE (XEXP (p0, 1)) == LSHIFTRT
+      && REG_P (XEXP (XEXP (p0, 1), 0))
+      && REGNO (XEXP (XEXP (p0, 1), 0)) == REGNO (operands[0])
+      && CONST_INT_P (XEXP (XEXP (p0, 1), 1))
+      && INTVAL (XEXP (XEXP (p0, 1), 1)) == 31
+
+      /* (clobber (reg:SI T_REG))  */
+      && GET_CODE (p1) == CLOBBER && REG_P (XEXP (p1, 0))
+      && REGNO (XEXP (p1, 0)) == T_REG)
+    {
+      operands[0] = XEXP (p0, 0);
+      set_insn_deleted (i);
+    }
+  else
+    FAIL;
+}
+  [(set_attr "type" "arith")])
+
+(define_expand "one_cmpldi2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(xor:DI (match_operand:DI 1 "arith_reg_operand" "")
+		(const_int -1)))]
+  "TARGET_SHMEDIA" "")
+
+(define_expand "abs<mode>2"
+  [(parallel [(set (match_operand:SIDI 0 "arith_reg_dest")
+		   (abs:SIDI (match_operand:SIDI 1 "arith_reg_operand")))
+	      (clobber (reg:SI T_REG))])]
+  "TARGET_SH1")
+
+(define_insn_and_split "*abs<mode>2"
+  [(set (match_operand:SIDI 0 "arith_reg_dest")
+  	(abs:SIDI (match_operand:SIDI 1 "arith_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  if (<MODE>mode == SImode)
+    emit_insn (gen_cmpgesi_t (operands[1], const0_rtx));
+  else
+    {
+      rtx high_src = gen_highpart (SImode, operands[1]);
+      emit_insn (gen_cmpgesi_t (high_src, const0_rtx));
+    }
+
+  emit_insn (gen_neg<mode>_cond (operands[0], operands[1], operands[1],
+				 const1_rtx));
+  DONE;
+})
+
+(define_insn_and_split "*negabs<mode>2"
+  [(set (match_operand:SIDI 0 "arith_reg_dest")
+	(neg:SIDI (abs:SIDI (match_operand:SIDI 1 "arith_reg_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  if (<MODE>mode == SImode)
+    emit_insn (gen_cmpgesi_t (operands[1], const0_rtx));
+  else
+    {
+      rtx high_src = gen_highpart (SImode, operands[1]);
+      emit_insn (gen_cmpgesi_t (high_src, const0_rtx));
+    }
+
+  emit_insn (gen_neg<mode>_cond (operands[0], operands[1], operands[1],
+				 const0_rtx));
+  DONE;
+})
+
+;; The SH4 202 can do zero-offset branches without pipeline stalls.
+;; This can be used as some kind of conditional execution, which is useful
+;; for abs.
+;; Actually the instruction scheduling should decide whether to use a
+;; zero-offset branch or not for any generic case involving a single
+;; instruction on SH4 202.
+(define_insn_and_split "negsi_cond"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(if_then_else
+	  (eq:SI (reg:SI T_REG) (match_operand:SI 3 "const_int_operand" "M,N"))
+	  (match_operand:SI 1 "arith_reg_operand" "0,0")
+	  (neg:SI (match_operand:SI 2 "arith_reg_operand" "r,r"))))]
+  "TARGET_SH1 && TARGET_ZDCBRANCH"
+{
+  static const char* alt[] =
+  {
+       "bt	0f"	"\n"
+    "	neg	%2,%0"	"\n"
+    "0:",
+
+       "bf	0f"	"\n"
+    "	neg	%2,%0"	"\n"
+    "0:"
+  };
+  return alt[which_alternative];
+}
+  "TARGET_SH1 && ! TARGET_ZDCBRANCH"
+  [(const_int 0)]
+{
+  rtx skip_neg_label = gen_label_rtx ();
+
+  emit_move_insn (operands[0], operands[1]);
+
+  emit_jump_insn (INTVAL (operands[3])
+		  ? gen_branch_true (skip_neg_label)
+		  : gen_branch_false (skip_neg_label));
+
+  emit_label_after (skip_neg_label,
+		    emit_insn (gen_negsi2 (operands[0], operands[1])));
+  DONE;
+}
+  [(set_attr "type" "arith") ;; poor approximation
+   (set_attr "length" "4")])
+
+(define_insn_and_split "negdi_cond"
+  [(set (match_operand:DI 0 "arith_reg_dest")
+	(if_then_else
+	  (eq:SI (reg:SI T_REG) (match_operand:SI 3 "const_int_operand"))
+	  (match_operand:DI 1 "arith_reg_operand")
+	  (neg:DI (match_operand:DI 2 "arith_reg_operand"))))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx skip_neg_label = gen_label_rtx ();
+
+  emit_move_insn (operands[0], operands[1]);
+
+  emit_jump_insn (INTVAL (operands[3]) 
+		  ? gen_branch_true (skip_neg_label)
+		  : gen_branch_false (skip_neg_label));
+
+  if (!INTVAL (operands[3]))
+    emit_insn (gen_clrt ());
+
+  emit_insn (gen_negc (gen_lowpart (SImode, operands[0]),
+		       gen_lowpart (SImode, operands[1])));
+  emit_label_after (skip_neg_label,
+		    emit_insn (gen_negc (gen_highpart (SImode, operands[0]),
+					 gen_highpart (SImode, operands[1]))));
+  DONE;
+})
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(bswap:SI (match_operand:SI 1 "arith_reg_operand" "")))]
+  "TARGET_SH1"
+{
+  if (! can_create_pseudo_p ())
+    FAIL;
+  else
+    {
+      rtx tmp0 = gen_reg_rtx (SImode);
+      rtx tmp1 = gen_reg_rtx (SImode);
+
+      emit_insn (gen_swapbsi2 (tmp0, operands[1]));
+      emit_insn (gen_rotlsi3_16 (tmp1, tmp0));
+      emit_insn (gen_swapbsi2 (operands[0], tmp1));
+      DONE;
+    }
+})
+
+(define_insn "swapbsi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand" "r")
+			(const_int 4294901760))
+		(ior:SI (and:SI (ashift:SI (match_dup 1) (const_int 8))
+				(const_int 65280))
+			(and:SI (ashiftrt:SI (match_dup 1) (const_int 8))
+				(const_int 255)))))]
+  "TARGET_SH1"
+  "swap.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+;; The *swapbisi2_and_shl8 pattern helps the combine pass simplifying
+;; partial byte swap expressions such as...
+;;   ((x & 0xFF) << 8) | ((x >> 8) & 0xFF).
+;; ...which are currently not handled by the tree optimizers.
+;; The combine pass will not initially try to combine the full expression,
+;; but only some sub-expressions.  In such a case the *swapbisi2_and_shl8
+;; pattern acts as an intermediate pattern that will eventually lead combine
+;; to the swapbsi2 pattern above.
+;; As a side effect this also improves code that does (x & 0xFF) << 8
+;; or (x << 8) & 0xFF00.
+(define_insn_and_split "*swapbisi2_and_shl8"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (and:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r")
+				   (const_int 8))
+			(const_int 65280))
+		(match_operand:SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SH1 && ! reload_in_progress && ! reload_completed"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx tmp0 = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (SImode);
+
+  emit_insn (gen_zero_extendqisi2 (tmp0, gen_lowpart (QImode, operands[1])));
+  emit_insn (gen_swapbsi2 (tmp1, tmp0));
+  emit_insn (gen_iorsi3 (operands[0], tmp1, operands[2]));
+  DONE;
+})
+
+;; The *swapbhisi2 pattern is, like the *swapbisi2_and_shl8 pattern, another
+;; intermediate pattern that will help the combine pass arriving at swapbsi2.
+(define_insn_and_split "*swapbhisi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (and:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "r")
+				   (const_int 8))
+			(const_int 65280))
+		(zero_extract:SI (match_dup 1) (const_int 8) (const_int 8))))]
+  "TARGET_SH1 && ! reload_in_progress && ! reload_completed"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+
+  emit_insn (gen_zero_extendhisi2 (tmp, gen_lowpart (HImode, operands[1])));
+  emit_insn (gen_swapbsi2 (operands[0], tmp));
+  DONE;
+})
+
+;; In some cases the swapbsi2 pattern might leave a sequence such as...
+;;   swap.b  r4,r4
+;;   mov     r4,r0
+;;
+;; which can be simplified to...
+;;   swap.b  r4,r0
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand" "")
+			(const_int 4294901760))
+		(ior:SI (and:SI (ashift:SI (match_dup 1) (const_int 8))
+				(const_int 65280))
+			(and:SI (ashiftrt:SI (match_dup 1) (const_int 8))
+				(const_int 255)))))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(match_dup 0))]
+  "TARGET_SH1 && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(ior:SI (and:SI (match_operand:SI 1 "arith_reg_operand" "")
+			(const_int 4294901760))
+		(ior:SI (and:SI (ashift:SI (match_dup 1) (const_int 8))
+				(const_int 65280))
+			(and:SI (ashiftrt:SI (match_dup 1) (const_int 8))
+				(const_int 255)))))])
+
+;; -------------------------------------------------------------------------
+;; Zero extension instructions
+;; -------------------------------------------------------------------------
+
+(define_insn "zero_extendsidi2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extend:DI (match_operand:SI 1 "extend_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "addz.l	%1, r63, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "extend")])
+
+(define_insn "zero_extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.uw	%m1, %0"
+  [(set_attr "type" "*,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI (match_operand:HI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 48)))
+   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
+{
+  if (GET_CODE (operands[1]) == TRUNCATE)
+    operands[1] = XEXP (operands[1], 0);
+})
+
+;; ??? when a truncated input to a zero_extend is reloaded, reload will
+;; reload the entire truncate expression.
+(define_insn_and_split "*loaddi_trunc"
+  [(set (match_operand 0 "any_register_operand" "=r")
+	(truncate (match_operand:DI 1 "memory_operand" "m")))]
+  "TARGET_SHMEDIA && reload_completed"
+  "#"
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = gen_rtx_REG (DImode, true_regnum (operands[0]));
+})
+
+(define_insn "zero_extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	andi	%1, 255, %0
+	ld%M1.ub	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_expand "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(zero_extend:SI (match_operand:QIHI 1 "zero_extend_operand")))])
+
+(define_insn_and_split "*zero_extend<mode>si2_compact"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extend:SI (match_operand:QIHI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "extu.<bw>	%1,%0"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  /* Sometimes combine fails to combine a T bit or negated T bit store to a
+     reg with a following zero extension.  In the split pass after combine,
+     try to figure out how the extended reg was set.  If it originated from
+     the T bit we can replace the zero extension with a reg move, which will
+     be eliminated.  Notice that this also helps the *cbranch_t splitter when
+     it tries to post-combine tests and conditional branches, as it does not
+     check for zero extensions.  */
+  operands[2] = sh_try_omit_signzero_extend (operands[1], curr_insn);
+  if (operands[2] == NULL_RTX)
+    FAIL;
+}
+  [(set_attr "type" "arith")])
+
+(define_insn "*zero_extendhisi2_media"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.uw	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 16)))]
+{
+  rtx op1 = operands[1];
+
+  if (GET_CODE (op1) == TRUNCATE)
+    op1 = XEXP (op1, 0);
+  operands[2]
+    = simplify_gen_subreg (SImode, op1, GET_MODE (op1),
+			   subreg_lowpart_offset (SImode, GET_MODE (op1)));
+})
+
+(define_insn "*zero_extendqisi2_media"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(zero_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	andi	%1, 255, %0
+	ld%M1.ub	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_insn "zero_extendqihi2"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(zero_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "extu.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+;; SH2A supports two zero extending load instructions: movu.b and movu.w.
+;; They could also be used for simple memory addresses like @Rn by setting
+;; the displacement value to zero.  However, doing so too early results in
+;; missed opportunities for other optimizations such as post-inc or index
+;; addressing loads.
+;; Although the 'zero_extend_movu_operand' predicate does not allow simple
+;; register addresses (an address without a displacement, index, post-inc),
+;; zero-displacement addresses might be generated during reload, wich are
+;; simplified to simple register addresses in turn.  Thus, we have to
+;; provide the Sdd and Sra alternatives in the patterns.
+(define_insn "*zero_extend<mode>si2_disp_mem"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,r")
+	(zero_extend:SI
+	  (match_operand:QIHI 1 "zero_extend_movu_operand" "Sdd,Sra")))]
+  "TARGET_SH2A"
+  "@
+	movu.<bw>	%1,%0
+	movu.<bw>	@(0,%t1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "4")])
+
+;; Convert the zero extending loads in sequences such as:
+;;	movu.b	@(1,r5),r0	movu.w	@(2,r5),r0
+;;	mov.b	r0,@(1,r4)	mov.b	r0,@(1,r4)
+;;
+;; back to sign extending loads like:
+;;	mov.b	@(1,r5),r0	mov.w	@(2,r5),r0
+;;	mov.b	r0,@(1,r4)	mov.b	r0,@(1,r4)
+;;
+;; if the extension type is irrelevant.  The sign extending mov.{b|w} insn
+;; is only 2 bytes in size if the displacement is {K04|K05}.
+;; If the displacement is greater it doesn't matter, so we convert anyways.
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(zero_extend:SI (match_operand 1 "displacement_mem_operand" "")))
+   (set (match_operand 2 "nonimmediate_operand" "")
+	(match_operand 3 "arith_reg_operand" ""))]
+  "TARGET_SH2A
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && peep2_reg_dead_p (2, operands[0])
+   && GET_MODE_SIZE (GET_MODE (operands[2]))
+      <= GET_MODE_SIZE (GET_MODE (operands[1]))"
+  [(set (match_dup 0) (sign_extend:SI (match_dup 1)))
+   (set (match_dup 2) (match_dup 3))])
+
+;; Fold sequences such as
+;;	mov.b	@r3,r7
+;;	extu.b	r7,r7
+;; into
+;;	movu.b	@(0,r3),r7
+;; This does not reduce the code size but the number of instructions is
+;; halved, which results in faster code.
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(sign_extend:SI (match_operand 1 "simple_mem_operand" "")))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(zero_extend:SI (match_operand 3 "arith_reg_operand" "")))]
+  "TARGET_SH2A
+   && GET_MODE (operands[1]) == GET_MODE (operands[3])
+   && (GET_MODE (operands[1]) == QImode || GET_MODE (operands[1]) == HImode)
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && (REGNO (operands[2]) == REGNO (operands[0])
+       || peep2_reg_dead_p (2, operands[0]))"
+  [(set (match_dup 2) (zero_extend:SI (match_dup 4)))]
+{
+  operands[4]
+    = replace_equiv_address (operands[1],
+			     gen_rtx_PLUS (SImode, XEXP (operands[1], 0),
+					   const0_rtx));
+})
+
+;; -------------------------------------------------------------------------
+;; Sign extension instructions
+;; -------------------------------------------------------------------------
+
+;; ??? This should be a define expand.
+;; ??? Or perhaps it should be dropped?
+
+;; convert_move generates good code for SH[1-4].
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,?f")))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, r63, %0
+	ld%M1.l	%m1, %0
+	fmov.sl	%1, %0"
+  [(set_attr "type" "arith_media,load_media,fpconv_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "extend")))])
+
+(define_insn "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.w	%m1, %0"
+  [(set_attr "type" "*,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:HI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 48)))
+   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
+{
+  if (GET_CODE (operands[1]) == TRUNCATE)
+    operands[1] = XEXP (operands[1], 0);
+})
+
+(define_insn "extendqidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.b	%m1, %0"
+  [(set_attr "type" "*,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(sign_extend:DI (match_operand:QI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:DI (subreg:DI (match_dup 1) 0) (const_int 56)))
+   (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
+{
+  if (GET_CODE (operands[1]) == TRUNCATE)
+    operands[1] = XEXP (operands[1], 0);
+})
+
+(define_expand "extend<mode>si2"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))])
+
+(define_insn "*extendhisi2_media"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:HI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.w	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 16)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 16)))]
+{
+  rtx op1 = operands[1];
+  if (GET_CODE (op1) == TRUNCATE)
+    op1 = XEXP (op1, 0);
+  operands[2]
+    = simplify_gen_subreg (SImode, op1, GET_MODE (op1),
+			   subreg_lowpart_offset (SImode, GET_MODE (op1)));
+})
+
+(define_insn_and_split "*extend<mode>si2_compact_reg"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(sign_extend:SI (match_operand:QIHI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "exts.<bw>	%1,%0"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 0) (match_dup 2))]
+{
+  /* Sometimes combine fails to combine a T bit or negated T bit store to a
+     reg with a following sign extension.  In the split pass after combine,
+     try to figure the extended reg was set.  If it originated from the T
+     bit we can replace the sign extension with a reg move, which will be
+     eliminated.  */
+  operands[2] = sh_try_omit_signzero_extend (operands[1], curr_insn);
+  if (operands[2] == NULL_RTX)
+    FAIL;
+}
+  [(set_attr "type" "arith")])
+
+;; FIXME: Fold non-SH2A and SH2A alternatives with "enabled" attribute.
+;; See movqi insns.
+(define_insn "*extend<mode>si2_compact_mem_disp"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,r")
+	(sign_extend:SI
+	  (mem:QIHI
+	    (plus:SI
+	      (match_operand:SI 1 "arith_reg_operand" "%r,r")
+	      (match_operand:SI 2 "const_int_operand" "<disp04>,N")))))]
+  "TARGET_SH1 && ! TARGET_SH2A
+   && sh_legitimate_index_p (<MODE>mode, operands[2], false, true)"
+  "@
+	mov.<bw>	@(%O2,%1),%0
+	mov.<bw>	@%1,%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*extend<mode>si2_compact_mem_disp"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z,r,r")
+	(sign_extend:SI
+	  (mem:QIHI
+	    (plus:SI
+	      (match_operand:SI 1 "arith_reg_operand" "%r,r,r")
+	      (match_operand:SI 2 "const_int_operand" "<disp04>,N,<disp12>")))))]
+  "TARGET_SH2A && sh_legitimate_index_p (<MODE>mode, operands[2], true, true)"
+  "@
+	mov.<bw>	@(%O2,%1),%0
+	mov.<bw>	@%1,%0
+	mov.<bw>	@(%O2,%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "2,2,4")])
+
+;; The *_snd patterns will take care of other QImode/HImode addressing
+;; modes than displacement addressing.  They must be defined _after_ the
+;; displacement addressing patterns.  Otherwise the displacement addressing
+;; patterns will not be picked.
+(define_insn "*extend<mode>si2_compact_snd"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(sign_extend:SI
+	  (match_operand:QIHI 1 "movsrc_no_disp_mem_operand" "Snd")))]
+  "TARGET_SH1"
+  "mov.<bw>	%1,%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*extendqisi2_media"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(sign_extend:SI (match_operand:QI 1 "general_extend_operand" "r,m")))]
+  "TARGET_SHMEDIA"
+  "@
+	#
+	ld%M1.b	%m1, %0"
+  [(set_attr "type" "arith_media,load_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "extend_reg_operand" "")))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (ashift:SI (match_dup 2) (const_int 24)))
+   (set (match_dup 0) (ashiftrt:SI (match_dup 0) (const_int 24)))]
+{
+  rtx op1 = operands[1];
+  if (GET_CODE (op1) == TRUNCATE)
+    op1 = XEXP (op1, 0);
+  operands[2]
+    = simplify_gen_subreg (SImode, op1, GET_MODE (op1),
+			   subreg_lowpart_offset (SImode, GET_MODE (op1)));
+})
+
+(define_expand "extendqihi2"
+  [(set (match_operand:HI 0 "arith_reg_dest" "")
+	(sign_extend:HI (match_operand:QI 1 "arith_reg_operand" "")))]
+  ""
+  "")
+
+(define_insn "*extendqihi2_compact_reg"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(sign_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))]
+  "TARGET_SH1"
+  "exts.b	%1,%0"
+  [(set_attr "type" "arith")])
+
+;; It would seem useful to combine the truncXi patterns into the movXi
+;; patterns, but unary operators are ignored when matching constraints,
+;; so we need separate patterns.
+(define_insn "truncdisi2"
+  [(set (match_operand:SI 0 "general_movdst_operand" "=r,m,m,f,r,f")
+	(truncate:SI (match_operand:DI 1 "register_operand" "r,r,f,r,f,f")))]
+  "TARGET_SHMEDIA"
+  "@
+	add.l	%1, r63, %0
+	st%M0.l	%m0, %1
+	fst%M0.s	%m0, %T1
+	fmov.ls	%1, %0
+	fmov.sl	%T1, %0
+	fmov.s	%T1, %0"
+  [(set_attr "type" "arith_media,store_media,fstore_media,fload_media,
+		     fpconv_media,fmove_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "extend")))])
+
+(define_insn "truncdihi2"
+  [(set (match_operand:HI 0 "general_movdst_operand" "=?r,m")
+	(truncate:HI (match_operand:DI 1 "register_operand" "r,r")))]
+  "TARGET_SHMEDIA"
+{
+  static const char* alt[] =
+  {
+       "shlli	%1,48,%0"	"\n"
+    "	shlri	%0,48,%0",
+
+       "st%M0.w	%m0, %1"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "type"   "arith_media,store_media")
+   (set_attr "length" "8,4")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "extend")))])
+
+; N.B. This should agree with LOAD_EXTEND_OP and movqi.
+; Because we use zero extension, we can't provide signed QImode compares
+; using a simple compare or conditional branch insn.
+(define_insn "truncdiqi2"
+  [(set (match_operand:QI 0 "general_movdst_operand" "=r,m")
+	(truncate:QI (match_operand:DI 1 "register_operand" "r,r")))]
+  "TARGET_SHMEDIA"
+  "@
+	andi	%1, 255, %0
+	st%M0.b	%m0, %1"
+  [(set_attr "type"   "arith_media,store")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "extend")))])
+
+;; -------------------------------------------------------------------------
+;; Move instructions
+;; -------------------------------------------------------------------------
+
+;; define push and pop so it is easy for sh.c
+;; We can't use push and pop on SHcompact because the stack must always
+;; be 8-byte aligned.
+(define_expand "push"
+  [(set (mem:SI (pre_dec:SI (reg:SI SP_REG)))
+	(match_operand:SI 0 "register_operand" "r,l,x"))]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_expand "pop"
+  [(set (match_operand:SI 0 "register_operand" "=r,l,x")
+	(mem:SI (post_inc:SI (reg:SI SP_REG))))]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_expand "push_e"
+  [(parallel [(set (mem:SF (pre_dec:SI (reg:SI SP_REG)))
+		   (match_operand:SF 0 "" ""))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (scratch:SI))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_insn "push_fpul"
+  [(set (mem:SF (pre_dec:SI (reg:SI SP_REG))) (reg:SF FPUL_REG))]
+  "TARGET_SH2E && ! TARGET_SH5"
+  "sts.l	fpul,@-r15"
+  [(set_attr "type" "fstore")
+   (set_attr "late_fp_use" "yes")
+   (set_attr "hit_stack" "yes")])
+
+;; DFmode pushes for sh4 require a lot of what is defined for movdf_i4,
+;; so use that.
+(define_expand "push_4"
+  [(parallel [(set (mem:DF (pre_dec:SI (reg:SI SP_REG)))
+		   (match_operand:DF 0 "" ""))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (scratch:SI))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_expand "pop_e"
+  [(parallel [(set (match_operand:SF 0 "" "")
+	      (mem:SF (post_inc:SI (reg:SI SP_REG))))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (scratch:SI))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_insn "pop_fpul"
+  [(set (reg:SF FPUL_REG) (mem:SF (post_inc:SI (reg:SI SP_REG))))]
+  "TARGET_SH2E && ! TARGET_SH5"
+  "lds.l	@r15+,fpul"
+  [(set_attr "type" "load")
+   (set_attr "hit_stack" "yes")])
+
+(define_expand "pop_4"
+  [(parallel [(set (match_operand:DF 0 "" "")
+		   (mem:DF (post_inc:SI (reg:SI SP_REG))))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (scratch:SI))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+  "")
+
+(define_expand "push_fpscr"
+  [(const_int 0)]
+  "TARGET_SH2E"
+{
+  rtx insn = emit_insn (gen_fpu_switch (gen_frame_mem (PSImode,
+						 gen_rtx_PRE_DEC (Pmode,
+							  stack_pointer_rtx)),
+					get_fpscr_rtx ()));
+  add_reg_note (insn, REG_INC, stack_pointer_rtx);
+  DONE;
+})
+
+(define_expand "pop_fpscr"
+  [(const_int 0)]
+  "TARGET_SH2E"
+{
+  rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (),
+					gen_frame_mem (PSImode,
+						 gen_rtx_POST_INC (Pmode,
+							  stack_pointer_rtx))));
+  add_reg_note (insn, REG_INC, stack_pointer_rtx);
+  DONE;
+})
+
+;; The clrt and sett patterns can happen as the result of optimization and
+;; insn expansion.
+;; Comparisons might get simplified to a move of zero or 1 into the T reg.
+;; In this case they might not disappear completely, because the T reg is
+;; a fixed hard reg.
+;; When DImode operations that use the T reg as carry/borrow are split into
+;; individual SImode operations, the T reg is usually cleared before the
+;; first SImode insn.
+(define_insn "clrt"
+  [(set (reg:SI T_REG) (const_int 0))]
+  "TARGET_SH1"
+  "clrt"
+  [(set_attr "type" "mt_group")])
+
+(define_insn "sett"
+  [(set (reg:SI T_REG) (const_int 1))]
+  "TARGET_SH1"
+  "sett"
+  [(set_attr "type" "mt_group")])
+
+;; Use the combine pass to transform sequences such as
+;;	mov	r5,r0
+;;	add	#1,r0
+;;	shll2	r0
+;;	mov.l	@(r0,r4),r0
+;; into
+;;	shll2	r5
+;;	add	r4,r5
+;;	mov.l	@(4,r5),r0
+;;
+;; See also PR 39423.
+;; Notice that these patterns have a T_REG clobber, because the shift
+;; sequence that will be split out might clobber the T_REG.  Ideally, the
+;; clobber would be added conditionally, depending on the result of
+;; sh_ashlsi_clobbers_t_reg_p.  When splitting out the shifts we must go
+;; through the ashlsi3 expander in order to get the right shift insn --
+;; a T_REG clobbering or non-clobbering shift sequence or dynamic shift.
+;; FIXME: Combine never tries this kind of patterns for DImode.
+(define_insn_and_split "*movsi_index_disp_load"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(match_operand:SI 1 "mem_index_disp_operand" "m"))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 6) (plus:SI (match_dup 5) (match_dup 3)))
+   (set (match_dup 0) (match_dup 7))]
+{
+  rtx mem = operands[1];
+  rtx plus0_rtx = XEXP (mem, 0);
+  rtx plus1_rtx = XEXP (plus0_rtx, 0);
+  rtx mult_rtx = XEXP (plus1_rtx, 0);
+
+  operands[1] = XEXP (mult_rtx, 0);
+  operands[2] = GEN_INT (exact_log2 (INTVAL (XEXP (mult_rtx, 1))));
+  operands[3] = XEXP (plus1_rtx, 1);
+  operands[4] = XEXP (plus0_rtx, 1);
+  operands[5] = gen_reg_rtx (SImode);
+  operands[6] = gen_reg_rtx (SImode);
+  operands[7] =
+    replace_equiv_address (mem,
+			   gen_rtx_PLUS (SImode, operands[6], operands[4]));
+
+  emit_insn (gen_ashlsi3 (operands[5], operands[1], operands[2]));
+})
+
+(define_insn_and_split "*movhi_index_disp_load"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(SZ_EXTEND:SI (match_operand:HI 1 "mem_index_disp_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx mem = operands[1];
+  rtx plus0_rtx = XEXP (mem, 0);
+  rtx plus1_rtx = XEXP (plus0_rtx, 0);
+  rtx mult_rtx = XEXP (plus1_rtx, 0);
+
+  rtx op_1 = XEXP (mult_rtx, 0);
+  rtx op_2 = GEN_INT (exact_log2 (INTVAL (XEXP (mult_rtx, 1))));
+  rtx op_3 = XEXP (plus1_rtx, 1);
+  rtx op_4 = XEXP (plus0_rtx, 1);
+  rtx op_5 = gen_reg_rtx (SImode);
+  rtx op_6 = gen_reg_rtx (SImode);
+  rtx op_7 = replace_equiv_address (mem, gen_rtx_PLUS (SImode, op_6, op_4));
+
+  emit_insn (gen_ashlsi3 (op_5, op_1, op_2));
+  emit_insn (gen_addsi3 (op_6, op_5, op_3));
+
+  if (<CODE> == SIGN_EXTEND)
+    {
+      emit_insn (gen_extendhisi2 (operands[0], op_7));
+      DONE;
+    }
+  else if (<CODE> == ZERO_EXTEND)
+    {
+      /* On SH2A the movu.w insn can be used for zero extending loads.  */
+      if (TARGET_SH2A)
+	emit_insn (gen_zero_extendhisi2 (operands[0], op_7));
+      else
+	{
+	  emit_insn (gen_extendhisi2 (operands[0], op_7));
+	  emit_insn (gen_zero_extendhisi2 (operands[0],
+				           gen_lowpart (HImode, operands[0])));
+	}
+      DONE;
+    }
+  else
+    FAIL;
+})
+
+(define_insn_and_split "*mov<mode>_index_disp_store"
+  [(set (match_operand:HISI 0 "mem_index_disp_operand" "=m")
+	(match_operand:HISI 1 "arith_reg_operand" "r"))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 6) (plus:SI (match_dup 5) (match_dup 3)))
+   (set (match_dup 7) (match_dup 1))]
+{
+  rtx mem = operands[0];
+  rtx plus0_rtx = XEXP (mem, 0);
+  rtx plus1_rtx = XEXP (plus0_rtx, 0);
+  rtx mult_rtx = XEXP (plus1_rtx, 0);
+
+  operands[0] = XEXP (mult_rtx, 0);
+  operands[2] = GEN_INT (exact_log2 (INTVAL (XEXP (mult_rtx, 1))));
+  operands[3] = XEXP (plus1_rtx, 1);
+  operands[4] = XEXP (plus0_rtx, 1);
+  operands[5] = gen_reg_rtx (SImode);
+  operands[6] = gen_reg_rtx (SImode);
+  operands[7] =
+    replace_equiv_address (mem,
+			   gen_rtx_PLUS (SImode, operands[6], operands[4]));
+
+  emit_insn (gen_ashlsi3 (operands[5], operands[0], operands[2]));
+})
+
+;; t/r must come after r/r, lest reload will try to reload stuff like
+;; (set (subreg:SI (mem:QI (plus:SI (reg:SI SP_REG) (const_int 12)) 0) 0)
+;; (made from (set (subreg:SI (reg:QI ###) 0) ) into T.
+(define_insn "movsi_i"
+  [(set (match_operand:SI 0 "general_movdst_operand"
+	    "=r,r,r,r,r,r,m,<,<,x,l,x,l,r")
+	(match_operand:SI 1 "general_movsrc_operand"
+	 "Q,r,I08,mr,x,l,r,x,l,r,r,>,>,i"))]
+  "TARGET_SH1
+   && ! TARGET_SH2E
+   && ! TARGET_SH2A
+   && (register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "@
+	mov.l	%1,%0
+	mov	%1,%0
+	mov	%1,%0
+	mov.l	%1,%0
+	sts	%1,%0
+	sts	%1,%0
+	mov.l	%1,%0
+	sts.l	%1,%0
+	sts.l	%1,%0
+	lds	%1,%0
+	lds	%1,%0
+	lds.l	%1,%0
+	lds.l	%1,%0
+	fake	%1,%0"
+  [(set_attr "type" "pcload_si,move,movi8,load_si,mac_gp,prget,store,mac_mem,
+		     pstore,gp_mac,prset,mem_mac,pload,pcload_si")
+   (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
+
+;; t/r must come after r/r, lest reload will try to reload stuff like
+;; (subreg:SI (reg:SF FR14_REG) 0) into T (compiling stdlib/strtod.c -m3e -O2)
+;; ??? This allows moves from macl to fpul to be recognized, but these moves
+;; will require a reload.
+;; ??? We can't include f/f because we need the proper FPSCR setting when
+;; TARGET_FMOVD is in effect, and mode switching is done before reload.
+(define_insn "movsi_ie"
+  [(set (match_operand:SI 0 "general_movdst_operand"
+	    "=r,r,r,r,r,r,r,r,m,<,<,x,l,x,l,y,<,r,y,r,*f,y,*f,y")
+	(match_operand:SI 1 "general_movsrc_operand"
+	 "Q,r,I08,I20,I28,mr,x,l,r,x,l,r,r,>,>,>,y,i,r,y,y,*f,*f,y"))]
+  "(TARGET_SH2E || TARGET_SH2A)
+   && (register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "@
+	mov.l	%1,%0
+	mov	%1,%0
+	mov	%1,%0
+	movi20	%1,%0
+	movi20s	%1,%0
+	mov.l	%1,%0
+	sts	%1,%0
+	sts	%1,%0
+	mov.l	%1,%0
+	sts.l	%1,%0
+	sts.l	%1,%0
+	lds	%1,%0
+	lds	%1,%0
+	lds.l	%1,%0
+	lds.l	%1,%0
+	lds.l	%1,%0
+	sts.l	%1,%0
+	fake	%1,%0
+	lds	%1,%0
+	sts	%1,%0
+	fsts	fpul,%0
+	flds	%1,fpul
+	fmov	%1,%0
+	! move optimized away"
+  [(set_attr "type" "pcload_si,move,movi8,move,move,load_si,mac_gp,prget,store,
+		     mac_mem,pstore,gp_mac,prset,mem_mac,pload,load,fstore,
+		     pcload_si,gp_fpul,fpul_gp,fmove,fmove,fmove,nil")
+   (set_attr "late_fp_use" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes,*,*,yes,*,*,*,*")
+   (set_attr_alternative "length"
+     [(const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 4)
+      (const_int 4)
+      (if_then_else
+	(match_test "TARGET_SH2A")
+	(const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)
+      (if_then_else
+	(match_test "TARGET_SH2A")
+	(const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 0)])])
+
+(define_insn "movsi_i_lowpart"
+  [(set (strict_low_part
+	  (match_operand:SI 0 "general_movdst_operand" "+r,r,r,r,r,r,m,r"))
+	(match_operand:SI 1 "general_movsrc_operand" "Q,r,I08,mr,x,l,r,i"))]
+   "TARGET_SH1
+    && (register_operand (operands[0], SImode)
+        || register_operand (operands[1], SImode))"
+  "@
+	mov.l	%1,%0
+	mov	%1,%0
+	mov	%1,%0
+	mov.l	%1,%0
+	sts	%1,%0
+	sts	%1,%0
+	mov.l	%1,%0
+	fake	%1,%0"
+  [(set_attr "type" "pcload,move,arith,load,mac_gp,prget,store,pcload")])
+
+(define_insn_and_split "load_ra"
+  [(set (match_operand:SI 0 "general_movdst_operand" "")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "")] UNSPEC_RA))]
+  "TARGET_SH1"
+  "#"
+  "&& ! currently_expanding_to_rtl"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (TARGET_SHCOMPACT && crtl->saves_all_registers)
+    operands[1] = gen_frame_mem (SImode, return_address_pointer_rtx);
+})
+
+;; The '?'s in the following constraints may not reflect the time taken
+;; to perform the move. They are there to discourage the use of floating-
+;; point registers for storing integer values.
+(define_insn "*movsi_media"
+  [(set (match_operand:SI 0 "general_movdst_operand"
+	        "=r,r,r,r,m,f?,m,f?,r,f?,*b,r,b")
+	(match_operand:SI 1 "general_movsrc_operand"
+	 "r,I16Css,nCpg,m,rZ,m,f?,rZ,f?,f?,r,*b,Csy"))]
+  "TARGET_SHMEDIA_FPU
+   && (register_operand (operands[0], SImode)
+       || sh_register_operand (operands[1], SImode)
+       || GET_CODE (operands[1]) == TRUNCATE)"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1
+	fld%M1.s	%m1, %0
+	fst%M0.s	%m0, %1
+	fmov.ls	%N1, %0
+	fmov.sl	%1, %0
+	fmov.s	%1, %0
+	ptabs	%1, %0
+	gettr	%1, %0
+	pt	%1, %0"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media,
+		       fload_media,fstore_media,fload_media,fpconv_media,
+		       fmove_media,ptabs_media,gettr_media,pt_media")
+   (set_attr "length" "4,4,8,4,4,4,4,4,4,4,4,4,12")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_insn "*movsi_media_nofpu"
+  [(set (match_operand:SI 0 "general_movdst_operand"
+	        "=r,r,r,r,m,*b,r,*b")
+	(match_operand:SI 1 "general_movsrc_operand"
+	 "r,I16Css,nCpg,m,rZ,r,*b,Csy"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], SImode)
+       || sh_register_operand (operands[1], SImode)
+       || GET_CODE (operands[1]) == TRUNCATE)"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1
+	ptabs	%1, %0
+	gettr	%1, %0
+	pt	%1, %0"
+  [(set_attr "type" "arith_media,arith_media,*,load_media,store_media,
+		     ptabs_media,gettr_media,pt_media")
+   (set_attr "length" "4,4,8,4,4,4,4,12")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_expand "movsi_const"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(const:SI (unspec:SI [(match_operand:DI 1 "immediate_operand" "s")
+			      (const_int 16)] UNSPEC_EXTRACT_S16)))
+   (set (match_dup 0)
+	(ior:SI (ashift:SI (match_dup 0) (const_int 16))
+		(const:SI (unspec:SI [(match_dup 1)
+				      (const_int 0)] UNSPEC_EXTRACT_U16))))]
+  "TARGET_SHMEDIA && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+{
+  if (GET_CODE (operands[1]) == LABEL_REF
+      && GET_CODE (XEXP (operands[1], 0)) == CODE_LABEL)
+    LABEL_NUSES (XEXP (operands[1], 0)) += 2;
+  else if (GOTOFF_P (operands[1]))
+    {
+      rtx unspec = XEXP (operands[1], 0);
+
+      if (! UNSPEC_GOTOFF_P (unspec))
+	{
+	  unspec = XEXP (unspec, 0);
+	  if (! UNSPEC_GOTOFF_P (unspec))
+	    abort ();
+	}
+      if (GET_CODE (XVECEXP (unspec , 0, 0)) == LABEL_REF
+	  && (GET_CODE (XEXP (XVECEXP (unspec, 0, 0), 0)) == CODE_LABEL))
+	LABEL_NUSES (XEXP (XVECEXP (unspec, 0, 0), 0)) += 2;
+    }
+})
+
+(define_expand "movsi_const_16bit"
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(const:SI (unspec:SI [(match_operand:DI 1 "immediate_operand" "s")
+			      (const_int 0)] UNSPEC_EXTRACT_S16)))]
+  "TARGET_SHMEDIA && flag_pic && reload_completed
+   && GET_CODE (operands[1]) == SYMBOL_REF"
+  "")
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(match_operand:SI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+  [(const_int 0)]
+{
+  rtx insn = emit_insn (gen_movsi_const (operands[0], operands[1]));
+
+  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (operands[1]));
+
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && ((CONST_INT_P (operands[1])
+	&& ! satisfies_constraint_I16 (operands[1]))
+       || GET_CODE (operands[1]) == CONST_DOUBLE)"
+  [(set (subreg:DI (match_dup 0) 0) (match_dup 1))])
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "general_movdst_operand" "")
+	(match_operand:SI 1 "general_movsrc_operand" ""))]
+  ""
+{
+  prepare_move_operands (operands, SImode);
+})
+
+(define_expand "ic_invalidate_line"
+  [(parallel [(unspec_volatile [(match_operand:SI 0 "register_operand" "+r")
+				(match_dup 1)] UNSPEC_ICACHE)
+	      (clobber (scratch:SI))])]
+  "TARGET_HARD_SH4 || TARGET_SH5"
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_ic_invalidate_line_media (operands[0]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT)
+    {
+      operands[1] = function_symbol (NULL, "__ic_invalidate", SFUNC_STATIC);
+      operands[1] = force_reg (Pmode, operands[1]);
+      emit_insn (gen_ic_invalidate_line_compact (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_SH4A_ARCH || TARGET_SH4_300)
+    {
+      emit_insn (gen_ic_invalidate_line_sh4a (operands[0]));
+      DONE;
+    }
+  operands[0] = force_reg (Pmode, operands[0]);
+  operands[1] = force_reg (Pmode, GEN_INT (trunc_int_for_mode (0xf0000008,
+							       Pmode)));
+})
+
+;; The address %0 is assumed to be 4-aligned at least.  Thus, by ORing
+;; 0xf0000008, we get the low-oder bits *1*00 (binary), which fits
+;; the requirement *1*00 for associative address writes.  The alignment of
+;; %0 implies that its least significant bit is cleared,
+;; thus we clear the V bit of a matching entry if there is one.
+(define_insn "ic_invalidate_line_i"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")
+		     (match_operand:SI 1 "register_operand" "r")]
+		     UNSPEC_ICACHE)
+   (clobber (match_scratch:SI 2 "=&r"))]
+  "TARGET_HARD_SH4"
+{
+  return       "ocbwb	@%0"	"\n"
+	 "	extu.w	%0,%2"	"\n"
+	 "	or	%1,%2"	"\n"
+	 "	mov.l	%0,@%2";
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "cwb")])
+
+(define_insn "ic_invalidate_line_sh4a"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
+		    UNSPEC_ICACHE)]
+  "TARGET_SH4A_ARCH || TARGET_SH4_300"
+{
+  return       "ocbwb	@%0"	"\n"
+	 "	synco"		"\n"
+	 "	icbi	@%0";
+}
+  [(set_attr "length" "16")	;; FIXME: Why 16 and not 6?  Looks like typo.
+   (set_attr "type" "cwb")])
+
+;; ??? could make arg 0 an offsettable memory operand to allow to save
+;; an add in the code that calculates the address.
+(define_insn "ic_invalidate_line_media"
+  [(unspec_volatile [(match_operand 0 "any_register_operand" "r")]
+		    UNSPEC_ICACHE)]
+  "TARGET_SHMEDIA"
+{
+  return       "ocbwb	%0,0"	"\n"
+	 "	synco"		"\n"
+	 "	icbi	%0,0"	"\n"
+	 "	synci";
+}
+  [(set_attr "length" "16")
+   (set_attr "type" "invalidate_line_media")])
+
+(define_insn "ic_invalidate_line_compact"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "z")
+		     (match_operand:SI 1 "register_operand" "r")]
+		    UNSPEC_ICACHE)
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT"
+  "jsr @%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "initialize_trampoline"
+  [(match_operand:SI 0 "" "")
+   (match_operand:SI 1 "" "")
+   (match_operand:SI 2 "" "")]
+  "TARGET_SHCOMPACT"
+{
+  rtx sfun, tramp;
+
+  tramp = force_reg (Pmode, operands[0]);
+  sfun = force_reg (Pmode, function_symbol (NULL, "__init_trampoline",
+					    SFUNC_STATIC));
+  emit_move_insn (gen_rtx_REG (SImode, R2_REG), operands[1]);
+  emit_move_insn (gen_rtx_REG (SImode, R3_REG), operands[2]);
+
+  emit_insn (gen_initialize_trampoline_compact (tramp, sfun));
+  DONE;
+})
+
+(define_insn "initialize_trampoline_compact"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "z")
+		     (match_operand:SI 1 "register_operand" "r")
+		     (reg:SI R2_REG) (reg:SI R3_REG)]
+		    UNSPEC_INIT_TRAMP)
+
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT"
+  "jsr @%1%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "movhi"
+  [(set (match_operand:HI 0 "general_movdst_operand" "")
+	(match_operand:HI 1 "general_movsrc_operand" ""))]
+  ""
+{
+  prepare_move_operands (operands, HImode);
+})
+
+(define_expand "movqi"
+  [(set (match_operand:QI 0 "general_operand" "")
+	(match_operand:QI 1 "general_operand" ""))]
+  ""
+{
+  prepare_move_operands (operands, QImode);
+})
+
+;; Specifying the displacement addressing load / store patterns separately
+;; before the generic movqi / movhi pattern allows controlling the order
+;; in which load / store insns are selected in a more fine grained way.
+;; FIXME: The non-SH2A and SH2A variants should be combined by adding
+;; "enabled" attribute as it is done in other targets.
+(define_insn "*mov<mode>_store_mem_disp04"
+  [(set (mem:QIHI
+	  (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r,r")
+		   (match_operand:SI 1 "const_int_operand" "<disp04>,N")))
+	(match_operand:QIHI 2 "arith_reg_operand" "z,r"))]
+  "TARGET_SH1 && sh_legitimate_index_p (<MODE>mode, operands[1], false, true)"
+  "@
+	mov.<bw>	%2,@(%O1,%0)
+	mov.<bw>	%2,@%0"
+  [(set_attr "type" "store")])
+
+(define_insn "*mov<mode>_store_mem_disp12"
+  [(set (mem:QIHI
+	  (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+		   (match_operand:SI 1 "const_int_operand" "<disp12>")))
+	(match_operand:QIHI 2 "arith_reg_operand" "r"))]
+  "TARGET_SH2A && sh_legitimate_index_p (<MODE>mode, operands[1], true, true)"
+  "mov.<bw>	%2,@(%O1,%0)"
+  [(set_attr "type" "store")
+   (set_attr "length" "4")])
+
+(define_insn "*mov<mode>_load_mem_disp04"
+  [(set (match_operand:QIHI 0 "arith_reg_dest" "=z,r")
+	(mem:QIHI
+	  (plus:SI (match_operand:SI 1 "arith_reg_operand" "%r,r")
+		   (match_operand:SI 2 "const_int_operand" "<disp04>,N"))))]
+  "TARGET_SH1 && ! TARGET_SH2A
+   && sh_legitimate_index_p (<MODE>mode, operands[2], false, true)"
+  "@
+	mov.<bw>	@(%O2,%1),%0
+	mov.<bw>	@%1,%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*mov<mode>_load_mem_disp12"
+  [(set (match_operand:QIHI 0 "arith_reg_dest" "=z,r,r")
+	(mem:QIHI
+	  (plus:SI
+	    (match_operand:SI 1 "arith_reg_operand" "%r,r,r")
+	    (match_operand:SI 2 "const_int_operand" "<disp04>,N,<disp12>"))))]
+  "TARGET_SH2A && sh_legitimate_index_p (<MODE>mode, operands[2], true, true)"
+  "@
+	mov.<bw>	@(%O2,%1),%0
+	mov.<bw>	@%1,%0
+	mov.<bw>	@(%O2,%1),%0"
+  [(set_attr "type" "load")
+   (set_attr "length" "2,2,4")])
+
+;; The order of the constraint alternatives is important here.
+;; Q/r has to come first, otherwise PC relative loads might wrongly get
+;; placed into delay slots.  Since there is no QImode PC relative load, the
+;; Q constraint and general_movsrc_operand will reject it for QImode.
+;; The Snd alternatives should come before Sdd in order to avoid a preference
+;; of using r0 als the register operand for addressing modes other than
+;; displacement addressing.
+;; The Sdd alternatives allow only r0 as register operand, even though on
+;; SH2A any register could be allowed by switching to a 32 bit insn.
+;; Generally sticking to the r0 is preferrable, since it generates smaller
+;; code.  Obvious r0 reloads can then be eliminated with a peephole on SH2A.
+(define_insn "*mov<mode>"
+  [(set (match_operand:QIHI 0 "general_movdst_operand"
+			      "=r,r,r,Snd,r,  Sdd,z,  r,l")
+	(match_operand:QIHI 1 "general_movsrc_operand"
+			       "Q,r,i,r,  Snd,z,  Sdd,l,r"))]
+  "TARGET_SH1
+   && (arith_reg_operand (operands[0], <MODE>mode)
+       || arith_reg_operand (operands[1], <MODE>mode))"
+  "@
+	mov.<bw>	%1,%0
+	mov	%1,%0
+	mov	%1,%0
+	mov.<bw>	%1,%0
+	mov.<bw>	%1,%0
+	mov.<bw>	%1,%0
+	mov.<bw>	%1,%0
+	sts	%1,%0
+	lds	%1,%0"
+  [(set_attr "type" "pcload,move,movi8,store,load,store,load,prget,prset")
+   (set (attr "length")
+	(cond [(and (match_operand 0 "displacement_mem_operand")
+		    (not (match_operand 0 "short_displacement_mem_operand")))
+	       (const_int 4)
+	       (and (match_operand 1 "displacement_mem_operand")
+		    (not (match_operand 1 "short_displacement_mem_operand")))
+	       (const_int 4)]
+	      (const_int 2)))])
+
+(define_insn "*movqi_media"
+  [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,r,m")
+	(match_operand:QI 1 "general_movsrc_operand" "r,I16Css,m,rZ"))]
+  "TARGET_SHMEDIA
+   && (arith_reg_operand (operands[0], QImode)
+       || extend_reg_or_0_operand (operands[1], QImode))"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	ld%M1.ub	%m1, %0
+	st%M0.b	%m0, %N1"
+  [(set_attr "type" "arith_media,arith_media,load_media,store_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_expand "reload_inqi"
+  [(set (match_operand:SI 2 "" "=&r")
+	(match_operand:QI 1 "inqhi_operand" ""))
+   (set (match_operand:QI 0 "arith_reg_operand" "=r")
+	(truncate:QI (match_dup 3)))]
+  "TARGET_SHMEDIA"
+{
+  rtx inner = XEXP (operands[1], 0);
+  int regno = REGNO (inner);
+
+  regno += HARD_REGNO_NREGS (regno, GET_MODE (inner)) - 1;
+  operands[1] = gen_rtx_REG (SImode, regno);
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[2]));
+})
+
+(define_insn "*movhi_media"
+  [(set (match_operand:HI 0 "general_movdst_operand"     "=r,r,r,r,m")
+	(match_operand:HI 1 "general_movsrc_operand" "r,I16Css,n,m,rZ"))]
+  "TARGET_SHMEDIA
+   && (arith_reg_operand (operands[0], HImode)
+       || arith_reg_or_0_operand (operands[1], HImode))"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.w	%m1, %0
+	st%M0.w	%m0, %N1"
+  [(set_attr "type" "arith_media,arith_media,*,load_media,store_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:HI 0 "register_operand" "")
+	(match_operand:HI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && ! satisfies_constraint_I16 (operands[1])"
+  [(set (subreg:DI (match_dup 0) 0) (match_dup 1))])
+
+(define_expand "reload_inhi"
+  [(set (match_operand:SI 2 "" "=&r")
+	(match_operand:HI 1 "inqhi_operand" ""))
+   (set (match_operand:HI 0 "arith_reg_operand" "=r")
+	(truncate:HI (match_dup 3)))]
+  "TARGET_SHMEDIA"
+{
+  rtx inner = XEXP (operands[1], 0);
+  int regno = REGNO (inner);
+
+  regno += HARD_REGNO_NREGS (regno, GET_MODE (inner)) - 1;
+  operands[1] = gen_rtx_REG (SImode, regno);
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[2]));
+})
+
+;; x/r can be created by inlining/cse, e.g. for execute/961213-1.c
+;; compiled with -m2 -ml -O3 -funroll-loops
+(define_insn "*movdi_i"
+  [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,m,r,r,r,*!x")
+	(match_operand:DI 1 "general_movsrc_operand" "Q,r,m,r,I08,i,x,r"))]
+  "TARGET_SH1
+   && (arith_reg_operand (operands[0], DImode)
+       || arith_reg_operand (operands[1], DImode))"
+{
+  return output_movedouble (insn, operands, DImode);
+}
+  [(set_attr "length" "4")
+   (set_attr "type" "pcload,move,load,store,move,pcload,move,move")])
+
+;; If the output is a register and the input is memory or a register, we have
+;; to be careful and see which word needs to be loaded first.
+(define_split
+  [(set (match_operand:DI 0 "general_movdst_operand" "")
+	(match_operand:DI 1 "general_movsrc_operand" ""))]
+  "TARGET_SH1 && reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  int regno;
+
+  if ((MEM_P (operands[0])
+       && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+      || (MEM_P (operands[1])
+	  && GET_CODE (XEXP (operands[1], 0)) == POST_INC))
+    FAIL;
+
+  switch (GET_CODE (operands[0]))
+    {
+    case REG:
+      regno = REGNO (operands[0]);
+      break;
+    case SUBREG:
+      regno = subreg_regno (operands[0]);
+      break;
+    case MEM:
+      regno = -1;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (regno == -1
+      || ! refers_to_regno_p (regno, regno + 1, operands[1], 0))
+    {
+      operands[2] = operand_subword (operands[0], 0, 0, DImode);
+      operands[3] = operand_subword (operands[1], 0, 0, DImode);
+      operands[4] = operand_subword (operands[0], 1, 0, DImode);
+      operands[5] = operand_subword (operands[1], 1, 0, DImode);
+    }
+  else
+    {
+      operands[2] = operand_subword (operands[0], 1, 0, DImode);
+      operands[3] = operand_subword (operands[1], 1, 0, DImode);
+      operands[4] = operand_subword (operands[0], 0, 0, DImode);
+      operands[5] = operand_subword (operands[1], 0, 0, DImode);
+    }
+
+  if (operands[2] == 0 || operands[3] == 0
+      || operands[4] == 0 || operands[5] == 0)
+    FAIL;
+})
+
+;; The '?'s in the following constraints may not reflect the time taken
+;; to perform the move. They are there to discourage the use of floating-
+;; point registers for storing integer values.
+(define_insn "*movdi_media"
+  [(set (match_operand:DI 0 "general_movdst_operand"
+	         "=r,r,r,rl,m,f?,m,f?,r,f?,*b,r,*b")
+	(match_operand:DI 1 "general_movsrc_operand"
+	 "r,I16Css,nCpgF,m,rlZ,m,f?,rZ,f?,f?,r,*b,Csy"))]
+  "TARGET_SHMEDIA_FPU
+   && (register_operand (operands[0], DImode)
+       || sh_register_operand (operands[1], DImode))"
+  "@
+	add	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1
+	fld%M1.d	%m1, %0
+	fst%M0.d	%m0, %1
+	fmov.qd	%N1, %0
+	fmov.dq	%1, %0
+	fmov.d	%1, %0
+	ptabs	%1, %0
+	gettr	%1, %0
+	pt	%1, %0"
+  [(set_attr "type" "arith_media,arith_media,*,load_media,store_media,
+		     fload_media,fstore_media,fload_media,dfpconv_media,
+		     fmove_media,ptabs_media,gettr_media,pt_media")
+   (set_attr "length" "4,4,16,4,4,4,4,4,4,4,4,4,*")])
+
+(define_insn "*movdi_media_nofpu"
+  [(set (match_operand:DI 0 "general_movdst_operand" "=r,r,r,rl,m,*b,r,*b");
+	(match_operand:DI 1 "general_movsrc_operand" "r,I16Css,nCpgF,m,rlZ,r,*b,Csy"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], DImode)
+       || sh_register_operand (operands[1], DImode))"
+  "@
+	add	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1
+	ptabs	%1, %0
+	gettr	%1, %0
+	pt	%1, %0"
+  [(set_attr "type" "arith_media,arith_media,*,load_media,store_media,
+		     ptabs_media,gettr_media,pt_media")
+   (set_attr "length" "4,4,16,4,4,4,4,*")])
+
+(define_insn "*movdi_media_I16"
+  [(set (match_operand:DI 0 "ext_dest_operand" "=r")
+	(match_operand:DI 1 "const_int_operand" "I16"))]
+  "TARGET_SHMEDIA && reload_completed"
+  "movi	%1, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "length" "4")])
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx insn;
+
+  if (TARGET_SHMEDIA64)
+    insn = emit_insn (gen_movdi_const (operands[0], operands[1]));
+  else
+    insn = emit_insn (gen_movdi_const_32bit (operands[0], operands[1]));
+
+  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (operands[1]));
+
+  DONE;
+})
+
+(define_expand "movdi_const"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s")
+		  	      (const_int 48)] UNSPEC_EXTRACT_S16)))
+   (set (match_dup 0)
+	(ior:DI (ashift:DI (match_dup 0) (const_int 16))
+		(const:DI (unspec:DI [(match_dup 1)
+				      (const_int 32)] UNSPEC_EXTRACT_U16))))
+   (set (match_dup 0)
+	(ior:DI (ashift:DI (match_dup 0) (const_int 16))
+		(const:DI (unspec:DI [(match_dup 1)
+				      (const_int 16)] UNSPEC_EXTRACT_U16))))
+   (set (match_dup 0)
+	(ior:DI (ashift:DI (match_dup 0) (const_int 16))
+		(const:DI (unspec:DI [(match_dup 1)
+				      (const_int 0)] UNSPEC_EXTRACT_U16))))]
+  "TARGET_SHMEDIA64 && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+{
+  sh_mark_label (operands[1], 4);
+})
+
+(define_expand "movdi_const_32bit"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s")
+			      (const_int 16)] UNSPEC_EXTRACT_S16)))
+   (set (match_dup 0)
+	(ior:DI (ashift:DI (match_dup 0) (const_int 16))
+		(const:DI (unspec:DI [(match_dup 1)
+				      (const_int 0)] UNSPEC_EXTRACT_U16))))]
+  "TARGET_SHMEDIA32 && reload_completed
+   && MOVI_SHORI_BASE_OPERAND_P (operands[1])"
+{
+  sh_mark_label (operands[1], 2);
+})
+
+(define_expand "movdi_const_16bit"
+  [(set (match_operand:DI 0 "arith_reg_operand" "=r")
+	(const:DI (unspec:DI [(match_operand:DI 1 "immediate_operand" "s")
+			      (const_int 0)] UNSPEC_EXTRACT_S16)))]
+  "TARGET_SHMEDIA && flag_pic && reload_completed
+   && GET_CODE (operands[1]) == SYMBOL_REF"
+  "")
+
+(define_split
+  [(set (match_operand:DI 0 "ext_dest_operand" "")
+	(match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && CONST_INT_P (operands[1])
+   && ! satisfies_constraint_I16 (operands[1])"
+  [(set (match_dup 0) (match_dup 2))
+   (match_dup 1)]
+{
+  unsigned HOST_WIDE_INT val = INTVAL (operands[1]);
+  unsigned HOST_WIDE_INT low = val;
+  unsigned HOST_WIDE_INT high = val;
+  unsigned HOST_WIDE_INT sign;
+  unsigned HOST_WIDE_INT val2 = val ^ (val-1);
+
+  /* Zero-extend the 16 least-significant bits.  */
+  low &= 0xffff;
+
+  /* Arithmetic shift right the word by 16 bits.  */
+  high >>= 16;
+  if (GET_CODE (operands[0]) == SUBREG
+      && GET_MODE (SUBREG_REG (operands[0])) == SImode)
+    {
+      high &= 0xffff;
+      high ^= 0x8000;
+      high -= 0x8000;
+    }
+  else
+    {
+      sign = 1;
+      sign <<= (HOST_BITS_PER_WIDE_INT - 16 - 1);
+      high ^= sign;
+      high -= sign;
+    }
+  do
+    {
+      /* If we can't generate the constant with a two-insn movi / shori
+	 sequence, try some other strategies.  */
+      if (! CONST_OK_FOR_I16 (high))
+	{
+	  /* Try constant load / left shift.  We know VAL != 0.  */
+	  val2 = val ^ (val-1);
+	  if (val2 > 0x1ffff)
+	    {
+	      int trailing_zeroes = exact_log2 ((val2 >> 16) + 1) + 15;
+
+	      if (CONST_OK_FOR_I16 (val >> trailing_zeroes)
+		  || (! CONST_OK_FOR_I16 (high >> 16)
+		      && CONST_OK_FOR_I16 (val >> (trailing_zeroes + 16))))
+		{
+		  val2 = (HOST_WIDE_INT) val >> trailing_zeroes;
+		  operands[1] = gen_ashldi3_media (operands[0], operands[0],
+						   GEN_INT (trailing_zeroes));
+		  break;
+		}
+	    }
+	  /* Try constant load / right shift.  */
+	  val2 = (val >> 15) + 1;
+	  if (val2 == (val2 & -val2))
+	    {
+	      int shift = 49 - exact_log2 (val2);
+
+	      val2 = trunc_int_for_mode (val << shift, DImode);
+	      if (CONST_OK_FOR_I16 (val2))
+		{
+		  operands[1] = gen_lshrdi3_media (operands[0], operands[0],
+						   GEN_INT (shift));
+		  break;
+		}
+	    }
+	  /* Try mperm.w .  */
+	  val2 = val & 0xffff;
+	  if ((val >> 16 & 0xffff) == val2
+	      && (val >> 32 & 0xffff) == val2
+	      && (val >> 48 & 0xffff) == val2)
+	    {
+	      val2 = (HOST_WIDE_INT) val >> 48;
+	      operands[1] = gen_rtx_REG (V4HImode, true_regnum (operands[0]));
+	      operands[1] = gen_mperm_w0 (operands[1], operands[1]);
+	      break;
+	    }
+	  /* Try movi / mshflo.l  */
+	  val2 = (HOST_WIDE_INT) val >> 32;
+	  if (val2 == ((unsigned HOST_WIDE_INT)
+			trunc_int_for_mode (val, SImode)))
+	    {
+	      operands[1] = gen_mshflo_l_di (operands[0], operands[0],
+					     operands[0]);
+	      break;
+	    }
+	  /* Try movi / mshflo.l w/ r63.  */
+	  val2 = val + ((HOST_WIDE_INT) -1 << 32);
+	  if ((HOST_WIDE_INT) val2 < 0 && CONST_OK_FOR_I16 (val2))
+	    {
+	      operands[1] = gen_mshflo_l_di (operands[0], operands[0],
+					     const0_rtx);
+	      break;
+	    }
+	}
+      val2 = high;
+      operands[1] = gen_shori_media (operands[0], operands[0], GEN_INT (low));
+    }
+  while (0);
+  operands[2] = GEN_INT (val2);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "ext_dest_operand" "")
+	(match_operand:DI 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && GET_CODE (operands[1]) == CONST_DOUBLE"
+  [(set (match_dup 0) (match_dup 2))
+  (set (match_dup 0)
+       (ior:DI (ashift:DI (match_dup 0) (const_int 16)) (match_dup 1)))]
+{
+  unsigned HOST_WIDE_INT low = CONST_DOUBLE_LOW (operands[1]);
+  unsigned HOST_WIDE_INT high = CONST_DOUBLE_HIGH (operands[1]);
+  unsigned HOST_WIDE_INT val = low;
+  unsigned HOST_WIDE_INT sign;
+
+  /* Zero-extend the 16 least-significant bits.  */
+  val &= 0xffff;
+  operands[1] = GEN_INT (val);
+
+  /* Arithmetic shift right the double-word by 16 bits.  */
+  low >>= 16;
+  low |= (high & 0xffff) << (HOST_BITS_PER_WIDE_INT - 16);
+  high >>= 16;
+  sign = 1;
+  sign <<= (HOST_BITS_PER_WIDE_INT - 16 - 1);
+  high ^= sign;
+  high -= sign;
+
+  /* This will only be true if high is a sign-extension of low, i.e.,
+     it must be either 0 or (unsigned)-1, and be zero iff the
+     most-significant bit of low is set.  */
+  if (high + (low >> (HOST_BITS_PER_WIDE_INT - 1)) == 0)
+    operands[2] = GEN_INT (low);
+  else
+    operands[2] = immed_double_const (low, high, DImode);
+})
+
+(define_insn "shori_media"
+  [(set (match_operand:DI 0 "ext_dest_operand" "=r,r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0,0")
+			   (const_int 16))
+		(match_operand:DI 2 "immediate_operand" "K16Csu,nF")))]
+  "TARGET_SHMEDIA && (reload_completed || arith_reg_dest (operands[0], DImode))"
+  "@
+	shori	%u2, %0
+	#"
+  [(set_attr "type" "arith_media,*")])
+
+(define_insn "*shori_media_si"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			   (const_int 16))
+		(match_operand:SI 2 "immediate_operand" "K16Csu")))]
+  "TARGET_SHMEDIA"
+  "shori	%u2, %0")
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "general_movdst_operand" "")
+	(match_operand:DI 1 "general_movsrc_operand" ""))]
+  ""
+{
+  prepare_move_operands (operands, DImode);
+})
+
+(define_insn "movdf_media"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=f,f,r,r,r,f,m,r,m")
+	(match_operand:DF 1 "general_movsrc_operand" "f,rZ,f,r,F,m,f,m,rZ"))]
+  "TARGET_SHMEDIA_FPU
+   && (register_operand (operands[0], DFmode)
+       || sh_register_operand (operands[1], DFmode))"
+  "@
+	fmov.d	%1, %0
+	fmov.qd	%N1, %0
+	fmov.dq	%1, %0
+	add	%1, r63, %0
+	#
+	fld%M1.d	%m1, %0
+	fst%M0.d	%m0, %1
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type" "fmove_media,fload_media,dfpconv_media,arith_media,*,
+		     fload_media,fstore_media,load_media,store_media")])
+
+(define_insn "movdf_media_nofpu"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m")
+	(match_operand:DF 1 "general_movsrc_operand" "r,F,m,rZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], DFmode)
+       || sh_register_operand (operands[1], DFmode))"
+  "@
+	add	%1, r63, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type" "arith_media,*,load_media,store_media")])
+
+(define_split
+  [(set (match_operand:DF 0 "arith_reg_dest" "")
+	(match_operand:DF 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 3) (match_dup 2))]
+{
+  int endian = WORDS_BIG_ENDIAN ? 1 : 0;
+  long values[2];
+  REAL_VALUE_TYPE value;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]);
+  REAL_VALUE_TO_TARGET_DOUBLE (value, values);
+
+  if (HOST_BITS_PER_WIDE_INT >= 64)
+    operands[2] = immed_double_const ((unsigned long) values[endian]
+				      | ((HOST_WIDE_INT) values[1 - endian]
+					 << 32), 0, DImode);
+  else
+    {
+      gcc_assert (HOST_BITS_PER_WIDE_INT == 32);
+      operands[2] = immed_double_const (values[endian], values[1 - endian],
+	  			        DImode);
+    }
+
+  operands[3] = gen_rtx_REG (DImode, true_regnum (operands[0]));
+})
+
+;; FIXME: This should be a define_insn_and_split.
+(define_insn "movdf_k"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m")
+	(match_operand:DF 1 "general_movsrc_operand" "r,FQ,m,r"))]
+  "TARGET_SH1
+   && (! (TARGET_SH4 || TARGET_SH2A_DOUBLE) || reload_completed
+       /* ??? We provide some insn so that direct_{load,store}[DFmode] get set */
+       || (REG_P (operands[0]) && REGNO (operands[0]) == 3)
+       || (REG_P (operands[1]) && REGNO (operands[1]) == 3))
+   && (arith_reg_operand (operands[0], DFmode)
+       || arith_reg_operand (operands[1], DFmode))"
+{
+  return output_movedouble (insn, operands, DFmode);
+}
+  [(set_attr "length" "4")
+   (set_attr "type" "move,pcload,load,store")])
+
+;; All alternatives of movdf_i4 are split for ! TARGET_FMOVD.
+;; However, the d/F/c/z alternative cannot be split directly; it is converted
+;; with special code in machine_dependent_reorg into a load of the R0_REG and
+;; the d/m/c/X alternative, which is split later into single-precision
+;; instructions.  And when not optimizing, no splits are done before fixing
+;; up pcloads, so we need usable length information for that.
+(define_insn "movdf_i4"
+  [(set (match_operand:DF 0 "general_movdst_operand" "=d,r,d,d,m,r,r,m,!??r,!???d")
+	(match_operand:DF 1 "general_movsrc_operand"  "d,r,F,m,d,FQ,m,r,d,r"))
+   (use (match_operand:PSI 2 "fpscr_operand"          "c,c,c,c,c,c,c,c,c,c"))
+   (clobber (match_scratch:SI 3                      "=X,X,&z,X,X,X,X,X,X,X"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)
+   && (arith_reg_operand (operands[0], DFmode)
+       || arith_reg_operand (operands[1], DFmode))"
+  {
+    switch (which_alternative)
+    {
+    case 0:
+      if (TARGET_FMOVD)
+	return "fmov	%1,%0";
+      else if (REGNO (operands[0]) != REGNO (operands[1]) + 1)
+	return         "fmov	%R1,%R0"	"\n"
+	       "	fmov	%S1,%S0";
+      else
+	return         "fmov	%S1,%S0"	"\n"
+	       "	fmov	%R1,%R0";
+    case 3:
+    case 4:
+      return "fmov.d	%1,%0";
+    default:
+      return "#";
+    }
+  }
+  [(set_attr_alternative "length"
+     [(if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 8))
+      (const_int 4)
+      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
+      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
+      (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
+      (const_int 4)
+      (const_int 8) (const_int 8) ;; these need only 8 bytes for @(r0,rn)
+      ;; We can't use 4-byte push/pop on SHcompact, so we have to
+      ;; increment or decrement r15 explicitly.
+      (if_then_else
+       (match_test "TARGET_SHCOMPACT")
+       (const_int 10) (const_int 8))
+      (if_then_else
+       (match_test "TARGET_SHCOMPACT")
+       (const_int 10) (const_int 8))])
+   (set_attr "type" "fmove,move,pcfload,fload,fstore,pcload,load,store,load,fload")
+   (set_attr "late_fp_use" "*,*,*,*,yes,*,*,*,*,*")
+   (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes")
+					   (const_string "double")
+					   (const_string "none")))])
+
+;; Moving DFmode between fp/general registers through memory
+;; (the top of the stack) is faster than moving through fpul even for
+;; little endian.  Because the type of an instruction is important for its
+;; scheduling,  it is beneficial to split these operations, rather than
+;; emitting them in one single chunk, even if this will expose a stack
+;; use that will prevent scheduling of other stack accesses beyond this
+;; instruction.
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 "=X"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && reload_completed
+   && (true_regnum (operands[0]) < 16) != (true_regnum (operands[1]) < 16)"
+  [(const_int 0)]
+{
+  rtx insn, tos;
+
+  if (TARGET_SH5 && true_regnum (operands[1]) < 16)
+    {
+      emit_move_insn (stack_pointer_rtx,
+		      plus_constant (Pmode, stack_pointer_rtx, -8));
+      tos = gen_tmp_stack_mem (DFmode, stack_pointer_rtx);
+    }
+  else
+    tos = gen_tmp_stack_mem (DFmode,
+			     gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
+  insn = emit_insn (gen_movdf_i4 (tos, operands[1], operands[2]));
+  if (! (TARGET_SH5 && true_regnum (operands[1]) < 16))
+    add_reg_note (insn, REG_INC, stack_pointer_rtx);
+  if (TARGET_SH5 && true_regnum (operands[0]) < 16)
+    tos = gen_tmp_stack_mem (DFmode, stack_pointer_rtx);
+  else
+    tos = gen_tmp_stack_mem (DFmode,
+			     gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
+  insn = emit_insn (gen_movdf_i4 (operands[0], tos, operands[2]));
+  if (TARGET_SH5 && true_regnum (operands[0]) < 16)
+    emit_move_insn (stack_pointer_rtx,
+		    plus_constant (Pmode, stack_pointer_rtx, 8));
+  else
+    add_reg_note (insn, REG_INC, stack_pointer_rtx);
+  DONE;
+})
+
+;; local-alloc sometimes allocates scratch registers even when not required,
+;; so we must be prepared to handle these.
+
+;; Remove the use and clobber from a movdf_i4 so that we can use movdf_k.
+(define_split
+  [(set (match_operand:DF 0 "general_movdst_operand" "")
+	(match_operand:DF 1 "general_movsrc_operand"  ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)
+   && reload_completed
+   && true_regnum (operands[0]) < 16
+   && true_regnum (operands[1]) < 16"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  /* If this was a reg <-> mem operation with base + index reg addressing,
+     we have to handle this in a special way.  */
+  rtx mem = operands[0];
+  int store_p = 1;
+  if (! memory_operand (mem, DFmode))
+    {
+      mem = operands[1];
+      store_p = 0;
+    }
+  if (GET_CODE (mem) == SUBREG && SUBREG_BYTE (mem) == 0)
+    mem = SUBREG_REG (mem);
+  if (MEM_P (mem))
+    {
+      rtx addr = XEXP (mem, 0);
+      if (GET_CODE (addr) == PLUS
+	  && REG_P (XEXP (addr, 0))
+	  && REG_P (XEXP (addr, 1)))
+	{
+	  int offset;
+	  rtx reg0 = gen_rtx_REG (Pmode, 0);
+	  rtx regop = operands[store_p], word0 ,word1;
+
+	  if (GET_CODE (regop) == SUBREG)
+	    alter_subreg (&regop, true);
+	  if (REGNO (XEXP (addr, 0)) == REGNO (XEXP (addr, 1)))
+	    offset = 2;
+	  else
+	    offset = 4;
+	  mem = copy_rtx (mem);
+	  PUT_MODE (mem, SImode);
+	  word0 = gen_rtx_SUBREG (SImode, regop, 0);
+	  alter_subreg (&word0, true);
+	  word1 = gen_rtx_SUBREG (SImode, regop, 4);
+	  alter_subreg (&word1, true);
+	  if (store_p || ! refers_to_regno_p (REGNO (word0),
+					      REGNO (word0) + 1, addr, 0))
+	    {
+	      emit_insn (store_p
+			 ? gen_movsi_ie (mem, word0)
+			 : gen_movsi_ie (word0, mem));
+	      emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset)));
+	      mem = copy_rtx (mem);
+	      emit_insn (store_p
+			 ? gen_movsi_ie (mem, word1)
+			 : gen_movsi_ie (word1, mem));
+	      emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset)));
+	    }
+	  else
+	    {
+	      emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset)));
+	      emit_insn (gen_movsi_ie (word1, mem));
+	      emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset)));
+	      mem = copy_rtx (mem);
+	      emit_insn (gen_movsi_ie (word0, mem));
+	    }
+	  DONE;
+	}
+    }
+})
+
+;; Split away the clobber of r0 after machine_dependent_reorg has fixed pcloads.
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "memory_operand"  ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (reg:SI R0_REG))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (use (match_dup 2))
+	      (clobber (scratch:SI))])]
+  "")
+
+(define_expand "reload_indf__frn"
+  [(parallel [(set (match_operand:DF 0 "register_operand" "=a")
+		   (match_operand:DF 1 "immediate_operand" "FQ"))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+  "TARGET_SH1"
+  "")
+
+(define_expand "reload_outdf__RnFRm"
+  [(parallel [(set (match_operand:DF 0 "register_operand" "=r,f")
+		   (match_operand:DF 1 "register_operand" "af,r"))
+	      (clobber (match_operand:SI 2 "register_operand" "=&y,y"))])]
+  "TARGET_SH1"
+  "")
+
+;; Simplify no-op moves.
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_SH2E && reload_completed
+   && true_regnum (operands[0]) == true_regnum (operands[1])"
+  [(set (match_dup 0) (match_dup 0))]
+  "")
+
+;; fmovd substitute post-reload splits
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[0]))
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[1]))"
+  [(const_int 0)]
+{
+  int dst = true_regnum (operands[0]), src = true_regnum (operands[1]);
+  emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, dst),
+			   gen_rtx_REG (SFmode, src), operands[2]));
+  emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode, dst + 1),
+			   gen_rtx_REG (SFmode, src + 1), operands[2]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(mem:DF (match_operand:SI 1 "register_operand" "")))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[0]))
+   && find_regno_note (insn, REG_DEAD, true_regnum (operands[1]))"
+  [(const_int 0)]
+{
+  int regno = true_regnum (operands[0]);
+  rtx insn;
+  rtx mem = SET_SRC (XVECEXP (PATTERN (curr_insn), 0, 0));
+  rtx mem2
+    = change_address (mem, SFmode, gen_rtx_POST_INC (Pmode, operands[1]));
+  insn = emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode,
+					       regno + SH_REG_MSW_OFFSET),
+				  mem2, operands[2]));
+  add_reg_note (insn, REG_INC, operands[1]);
+  insn = emit_insn (gen_movsf_ie (gen_rtx_REG (SFmode,
+					       regno + SH_REG_LSW_OFFSET),
+				  change_address (mem, SFmode, NULL_RTX),
+				  operands[2]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+	(match_operand:DF 1 "memory_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[0]))"
+  [(const_int 0)]
+{
+  int regno = true_regnum (operands[0]);
+  rtx addr, insn;
+  rtx mem2 = change_address (operands[1], SFmode, NULL_RTX);
+  rtx reg0 = gen_rtx_REG (SFmode, regno + SH_REG_MSW_OFFSET);
+  rtx reg1 = gen_rtx_REG (SFmode, regno + SH_REG_LSW_OFFSET);
+
+  operands[1] = copy_rtx (mem2);
+  addr = XEXP (mem2, 0);
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      /* This is complicated.  If the register is an arithmetic register
+	 we can just fall through to the REG+DISP case below.  Otherwise
+	 we have to use a combination of POST_INC and REG addressing...  */
+      if (! arith_reg_operand (operands[1], SFmode))
+	{
+	  XEXP (mem2, 0) = addr = gen_rtx_POST_INC (SImode, addr);
+	  insn = emit_insn (gen_movsf_ie (reg0, mem2, operands[2]));
+	  add_reg_note (insn, REG_INC, XEXP (addr, 0));
+	  
+	  emit_insn (gen_movsf_ie (reg1, operands[1], operands[2]));
+
+	  /* If we have modified the stack pointer, the value that we have
+	     read with post-increment might be modified by an interrupt,
+	     so write it back.  */
+	  if (REGNO (XEXP (addr, 0)) == STACK_POINTER_REGNUM)
+	    emit_insn (gen_push_e (reg0));
+	  else
+	    emit_insn (gen_addsi3 (XEXP (operands[1], 0), XEXP (operands[1], 0),
+				   GEN_INT (-4)));
+	  break;
+	}
+      /* Fall through.  */
+
+    case PLUS:
+      emit_insn (gen_movsf_ie (reg0, operands[1], operands[2]));
+      operands[1] = copy_rtx (operands[1]);
+      XEXP (operands[1], 0) = plus_constant (Pmode, addr, 4);
+      emit_insn (gen_movsf_ie (reg1, operands[1], operands[2]));
+      break;
+
+    case POST_INC:
+      insn = emit_insn (gen_movsf_ie (reg0, operands[1], operands[2]));
+      add_reg_note (insn, REG_INC, XEXP (addr, 0));
+
+      insn = emit_insn (gen_movsf_ie (reg1, operands[1], operands[2]));
+      add_reg_note (insn, REG_INC, XEXP (addr, 0));
+      break;
+
+    default:
+      debug_rtx (addr);
+      gcc_unreachable ();
+    }
+
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DF 0 "memory_operand" "")
+	(match_operand:DF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && ! TARGET_FMOVD && reload_completed
+   && FP_OR_XD_REGISTER_P (true_regnum (operands[1]))"
+  [(const_int 0)]
+{
+  int regno = true_regnum (operands[1]);
+  rtx insn, addr;
+  rtx reg0 = gen_rtx_REG (SFmode, regno + SH_REG_MSW_OFFSET);
+  rtx reg1 = gen_rtx_REG (SFmode, regno + SH_REG_LSW_OFFSET);
+
+  operands[0] = copy_rtx (operands[0]);
+  PUT_MODE (operands[0], SFmode);
+  addr = XEXP (operands[0], 0);
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      /* This is complicated.  If the register is an arithmetic register
+	 we can just fall through to the REG+DISP case below.  Otherwise
+	 we have to use a combination of REG and PRE_DEC addressing...  */
+      if (! arith_reg_operand (operands[0], SFmode))
+	{
+	  emit_insn (gen_addsi3 (addr, addr, GEN_INT (4)));
+	  emit_insn (gen_movsf_ie (operands[0], reg1, operands[2]));
+
+	  operands[0] = copy_rtx (operands[0]);
+	  XEXP (operands[0], 0) = addr = gen_rtx_PRE_DEC (SImode, addr);
+	  
+	  insn = emit_insn (gen_movsf_ie (operands[0], reg0, operands[2]));
+	  add_reg_note (insn, REG_INC, XEXP (addr, 0));
+	  break;
+	}
+      /* Fall through.  */
+
+    case PLUS:
+      /* Since REG+DISP addressing has already been decided upon by gcc
+	 we can rely upon it having chosen an arithmetic register as the
+	 register component of the address.  Just emit the lower numbered
+	 register first, to the lower address, then the higher numbered
+	 register to the higher address.  */
+      emit_insn (gen_movsf_ie (operands[0], reg0, operands[2]));
+
+      operands[0] = copy_rtx (operands[0]);
+      XEXP (operands[0], 0) = plus_constant (Pmode, addr, 4);
+
+      emit_insn (gen_movsf_ie (operands[0], reg1, operands[2]));
+      break;
+
+    case PRE_DEC:
+      /* This is easy.  Output the word to go to the higher address
+         first (ie the word in the higher numbered register) then the
+	 word to go to the lower address.  */
+
+      insn = emit_insn (gen_movsf_ie (operands[0], reg1, operands[2]));
+      add_reg_note (insn, REG_INC, XEXP (addr, 0));
+
+      insn = emit_insn (gen_movsf_ie (operands[0], reg0, operands[2]));
+      add_reg_note (insn, REG_INC, XEXP (addr, 0));
+      break;
+
+    default:
+      /* FAIL; */
+      debug_rtx (addr);
+      gcc_unreachable ();
+    }
+
+  DONE;
+})
+
+;; If the output is a register and the input is memory or a register, we have
+;; to be careful and see which word needs to be loaded first.
+(define_split
+  [(set (match_operand:DF 0 "general_movdst_operand" "")
+	(match_operand:DF 1 "general_movsrc_operand" ""))]
+  "TARGET_SH1 && reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+{
+  int regno;
+
+  if ((MEM_P (operands[0])
+       && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC)
+      || (MEM_P (operands[1])
+	  && GET_CODE (XEXP (operands[1], 0)) == POST_INC))
+    FAIL;
+
+  switch (GET_CODE (operands[0]))
+    {
+    case REG:
+      regno = REGNO (operands[0]);
+      break;
+    case SUBREG:
+      regno = subreg_regno (operands[0]);
+      break;
+    case MEM:
+      regno = -1;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (regno == -1
+      || ! refers_to_regno_p (regno, regno + 1, operands[1], 0))
+    {
+      operands[2] = operand_subword (operands[0], 0, 0, DFmode);
+      operands[3] = operand_subword (operands[1], 0, 0, DFmode);
+      operands[4] = operand_subword (operands[0], 1, 0, DFmode);
+      operands[5] = operand_subword (operands[1], 1, 0, DFmode);
+    }
+  else
+    {
+      operands[2] = operand_subword (operands[0], 1, 0, DFmode);
+      operands[3] = operand_subword (operands[1], 1, 0, DFmode);
+      operands[4] = operand_subword (operands[0], 0, 0, DFmode);
+      operands[5] = operand_subword (operands[1], 0, 0, DFmode);
+    }
+
+  if (operands[2] == 0 || operands[3] == 0
+      || operands[4] == 0 || operands[5] == 0)
+    FAIL;
+})
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "general_movdst_operand" "")
+	(match_operand:DF 1 "general_movsrc_operand" ""))]
+  ""
+{
+  prepare_move_operands (operands, DFmode);
+  if (TARGET_SHMEDIA)
+    {
+      if (TARGET_SHMEDIA_FPU)
+	emit_insn (gen_movdf_media (operands[0], operands[1]));
+      else
+	emit_insn (gen_movdf_media_nofpu (operands[0], operands[1]));
+      DONE;
+    }
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+;;This is incompatible with the way gcc uses subregs.
+;;(define_insn "movv2sf_i"
+;;  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=f,f,m")
+;;	(match_operand:V2SF 1 "nonimmediate_operand" "f,m,f"))]
+;;  "TARGET_SHMEDIA_FPU
+;;   && (fp_arith_reg_operand (operands[0], V2SFmode)
+;;       || fp_arith_reg_operand (operands[1], V2SFmode))"
+;;  "@
+;;	#
+;;	fld%M1.p	%m1, %0
+;;	fst%M0.p	%m0, %1"
+;;  [(set_attr "type" "*,fload_media,fstore_media")])
+(define_insn_and_split "movv2sf_i"
+  [(set (match_operand:V2SF 0 "general_movdst_operand" "=f,rf,r,m,mf")
+	(match_operand:V2SF 1 "general_operand" "fm,rfm?,F?,f,rfZ?"))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "TARGET_SHMEDIA_FPU && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  operands[0] = simplify_gen_subreg (DFmode, operands[0], V2SFmode, 0);
+  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2SFmode, 0);
+})
+
+(define_expand "movv2sf"
+  [(set (match_operand:V2SF 0 "general_movdst_operand" "")
+	(match_operand:V2SF 1 "nonimmediate_operand" ""))]
+  "TARGET_SHMEDIA_FPU"
+{
+  prepare_move_operands (operands, V2SFmode);
+})
+
+(define_expand "addv2sf3"
+  [(match_operand:V2SF 0 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 1 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 2 "fp_arith_reg_operand" "")]
+  "TARGET_SHMEDIA_FPU"
+{
+  sh_expand_binop_v2sf (PLUS, operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "subv2sf3"
+  [(match_operand:V2SF 0 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 1 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 2 "fp_arith_reg_operand" "")]
+  "TARGET_SHMEDIA_FPU"
+{
+  sh_expand_binop_v2sf (MINUS, operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "mulv2sf3"
+  [(match_operand:V2SF 0 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 1 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 2 "fp_arith_reg_operand" "")]
+  "TARGET_SHMEDIA_FPU"
+{
+  sh_expand_binop_v2sf (MULT, operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "divv2sf3"
+  [(match_operand:V2SF 0 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 1 "fp_arith_reg_operand" "")
+   (match_operand:V2SF 2 "fp_arith_reg_operand" "")]
+  "TARGET_SHMEDIA_FPU"
+{
+  sh_expand_binop_v2sf (DIV, operands[0], operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn_and_split "*movv4sf_i"
+  [(set (match_operand:V4SF 0 "general_movdst_operand" "=f,rf,r,m,mf")
+	(match_operand:V4SF 1 "general_operand" "fm,rfm?,F?,f,rfZ?"))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  for (int i = 0; i < 4/2; i++)
+    {
+      rtx x, y;
+
+      if (MEM_P (operands[0]))
+	x = adjust_address (operands[0], V2SFmode,
+			    i * GET_MODE_SIZE (V2SFmode));
+      else
+	x = simplify_gen_subreg (V2SFmode, operands[0], V4SFmode, i * 8);
+
+      if (MEM_P (operands[1]))
+	y = adjust_address (operands[1], V2SFmode,
+			    i * GET_MODE_SIZE (V2SFmode));
+      else
+	y = simplify_gen_subreg (V2SFmode, operands[1], V4SFmode, i * 8);
+
+      emit_insn (gen_movv2sf_i (x, y));
+    }
+
+  DONE;
+}
+  [(set_attr "length" "8")])
+
+(define_expand "movv4sf"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+	(match_operand:V4SF 1 "general_operand" ""))]
+  "TARGET_SHMEDIA_FPU"
+{
+  prepare_move_operands (operands, V4SFmode);
+})
+
+(define_insn_and_split "*movv16sf_i"
+  [(set (match_operand:V16SF 0 "nonimmediate_operand" "=f,f,m")
+	(match_operand:V16SF 1 "nonimmediate_operand" "f,m,f"))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  for (int i = 0; i < 16/2; i++)
+    {
+      rtx x, y;
+
+      if (MEM_P (operands[0]))
+	x = adjust_address (operands[0], V2SFmode,
+			    i * GET_MODE_SIZE (V2SFmode));
+      else
+	{
+	  x = gen_rtx_SUBREG (V2SFmode, operands[0], i * 8);
+	  alter_subreg (&x, true);
+	}
+
+      if (MEM_P (operands[1]))
+	y = adjust_address (operands[1], V2SFmode,
+			    i * GET_MODE_SIZE (V2SFmode));
+      else
+	{
+	  y = gen_rtx_SUBREG (V2SFmode, operands[1], i * 8);
+	  alter_subreg (&y, true);
+	}
+
+      emit_insn (gen_movv2sf_i (x, y));
+    }
+
+  DONE;
+}
+  [(set_attr "length" "32")])
+
+(define_expand "movv16sf"
+  [(set (match_operand:V16SF 0 "nonimmediate_operand" "=f,f,m")
+	(match_operand:V16SF 1 "nonimmediate_operand" "f,m,f"))]
+  "TARGET_SHMEDIA_FPU"
+{
+  prepare_move_operands (operands, V16SFmode);
+})
+
+(define_insn "movsf_media"
+  [(set (match_operand:SF 0 "general_movdst_operand" "=f,f,r,r,r,f,m,r,m")
+	(match_operand:SF 1 "general_movsrc_operand" "f,rZ,f,r,F,m,f,m,rZ"))]
+  "TARGET_SHMEDIA_FPU
+   && (register_operand (operands[0], SFmode)
+       || sh_register_operand (operands[1], SFmode))"
+  "@
+	fmov.s	%1, %0
+	fmov.ls	%N1, %0
+	fmov.sl	%1, %0
+	add.l	%1, r63, %0
+	#
+	fld%M1.s	%m1, %0
+	fst%M0.s	%m0, %1
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1"
+  [(set_attr "type" "fmove_media,fload_media,fpconv_media,arith_media,*,fload_media,fstore_media,load_media,store_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_insn "movsf_media_nofpu"
+  [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,m")
+	(match_operand:SF 1 "general_movsrc_operand" "r,F,m,rZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], SFmode)
+       || sh_register_operand (operands[1], SFmode))"
+  "@
+	add.l	%1, r63, %0
+	#
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1"
+  [(set_attr "type" "arith_media,*,load_media,store_media")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_split
+  [(set (match_operand:SF 0 "arith_reg_dest" "")
+	(match_operand:SF 1 "immediate_operand" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && ! FP_REGISTER_P (true_regnum (operands[0]))"
+  [(set (match_dup 3) (match_dup 2))]
+{
+  long values;
+  REAL_VALUE_TYPE value;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (value, operands[1]);
+  REAL_VALUE_TO_TARGET_SINGLE (value, values);
+  operands[2] = GEN_INT (values);
+
+  operands[3] = gen_rtx_REG (DImode, true_regnum (operands[0]));
+})
+
+(define_insn "movsf_i"
+  [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,r,m,l,r")
+	(match_operand:SF 1 "general_movsrc_operand"  "r,G,FQ,mr,r,r,l"))]
+  "TARGET_SH1
+   && (! TARGET_SH2E
+       /* ??? We provide some insn so that direct_{load,store}[SFmode] get set */
+       || (REG_P (operands[0]) && REGNO (operands[0]) == 3)
+       || (REG_P (operands[1]) && REGNO (operands[1]) == 3))
+   && (arith_reg_operand (operands[0], SFmode)
+       || arith_reg_operand (operands[1], SFmode))"
+  "@
+	mov	%1,%0
+	mov	#0,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	lds	%1,%0
+	sts	%1,%0"
+  [(set_attr "type" "move,move,pcload,load,store,move,move")])
+
+;; We may not split the ry/yr/XX alternatives to movsi_ie, since
+;; update_flow_info would not know where to put REG_EQUAL notes
+;; when the destination changes mode.
+(define_insn "movsf_ie"
+  [(set (match_operand:SF 0 "general_movdst_operand"
+	 "=f,r,f,f,fy,f,m,r,r,m,f,y,y,rf,r,y,<,y,y")
+	(match_operand:SF 1 "general_movsrc_operand"
+	  "f,r,G,H,FQ,mf,f,FQ,mr,r,y,f,>,fr,y,r,y,>,y"))
+   (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c"))
+   (clobber (match_scratch:SI 3 "=X,X,Bsc,Bsc,&z,X,X,X,X,X,X,X,X,y,X,X,X,X,X"))]
+  "TARGET_SH2E
+   && (arith_reg_operand (operands[0], SFmode) || fpul_operand (operands[0], SFmode)
+       || arith_reg_operand (operands[1], SFmode) || fpul_operand (operands[1], SFmode)
+       || arith_reg_operand (operands[3], SImode))"
+  "@
+	fmov	%1,%0
+	mov	%1,%0
+	fldi0	%0
+	fldi1	%0
+	#
+	fmov.s	%1,%0
+	fmov.s	%1,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	mov.l	%1,%0
+	fsts	fpul,%0
+	flds	%1,fpul
+	lds.l	%1,%0
+	#
+	sts	%1,%0
+	lds	%1,%0
+	sts.l	%1,%0
+	lds.l	%1,%0
+	! move optimized away"
+  [(set_attr "type" "fmove,move,fmove,fmove,pcfload,fload,fstore,pcload,load,
+		     store,fmove,fmove,load,*,fpul_gp,gp_fpul,fstore,load,nil")
+   (set_attr "late_fp_use" "*,*,*,*,*,*,yes,*,*,*,*,*,*,*,yes,*,yes,*,*")
+   (set_attr_alternative "length"
+     [(const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 4)
+      (if_then_else
+	(match_test "TARGET_SH2A")
+	(const_int 4) (const_int 2))
+      (if_then_else
+	(match_test "TARGET_SH2A")
+	(const_int 4) (const_int 2))
+      (const_int 2)
+      (if_then_else
+	(match_test "TARGET_SH2A")
+	(const_int 4) (const_int 2))
+      (if_then_else
+	(match_test "TARGET_SH2A")
+	(const_int 4) (const_int 2))
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 4)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 2)
+      (const_int 0)])
+   (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes")
+					   (const_string "single")
+					   (const_string "single")))])
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+	(match_operand:SF 1 "register_operand" ""))
+   (use (match_operand:PSI 2 "fpscr_operand" ""))
+   (clobber (reg:SI FPUL_REG))]
+  "TARGET_SH1"
+  [(parallel [(set (reg:SF FPUL_REG) (match_dup 1))
+	      (use (match_dup 2))
+	      (clobber (scratch:SI))])
+   (parallel [(set (match_dup 0) (reg:SF FPUL_REG))
+	      (use (match_dup 2))
+	      (clobber (scratch:SI))])]
+  "")
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "general_movdst_operand" "")
+        (match_operand:SF 1 "general_movsrc_operand" ""))]
+  ""
+{
+  prepare_move_operands (operands, SFmode);
+  if (TARGET_SHMEDIA)
+    {
+      if (TARGET_SHMEDIA_FPU)
+	emit_insn (gen_movsf_media (operands[0], operands[1]));
+      else
+	emit_insn (gen_movsf_media_nofpu (operands[0], operands[1]));
+      DONE;
+    }
+  if (TARGET_SH2E)
+    {
+      emit_sf_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "mov_nop"
+  [(set (match_operand 0 "any_register_operand" "") (match_dup 0))]
+  "TARGET_SH2E"
+  ""
+  [(set_attr "length" "0")
+   (set_attr "type" "nil")])
+
+(define_expand "reload_insf__frn"
+  [(parallel [(set (match_operand:SF 0 "register_operand" "=a")
+		   (match_operand:SF 1 "immediate_operand" "FQ"))
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+  "TARGET_SH1"
+  "")
+
+(define_expand "reload_insi__i_fpul"
+  [(parallel [(set (match_operand:SI 0 "fpul_operand" "=y")
+		   (match_operand:SI 1 "immediate_operand" "i"))
+	      (clobber (match_operand:SI 2 "register_operand" "=&z"))])]
+  "TARGET_SH1"
+  "")
+
+(define_expand "ptabs"
+  [(set (match_operand 0 "" "=b") (match_operand 1 "" "r"))]
+  "TARGET_SHMEDIA"
+{
+  if (!TARGET_PT_FIXED)
+    {
+      rtx eq = operands[1];
+
+      /* ??? For canonical RTL we really should remove any CONST from EQ
+	 before wrapping it in the AND, and finally wrap the EQ into a
+	 const if is constant.  However, for reload we must expose the
+	 input register or symbolic constant, and we can't have
+	 different insn structures outside of the operands for different
+	 alternatives of the same pattern.  */
+      eq = gen_rtx_EQ (SImode, gen_rtx_AND (Pmode, eq, GEN_INT (3)),
+		       GEN_INT (3));
+      operands[1]
+	= (gen_rtx_IF_THEN_ELSE
+	    (PDImode,
+	     eq,
+	     gen_rtx_MEM (PDImode, operands[1]),
+	     gen_rtx_fmt_e (TARGET_SHMEDIA32 ? SIGN_EXTEND : TRUNCATE,
+			    PDImode, operands[1])));
+    }
+})
+
+;; expanded by ptabs expander.
+(define_insn "*extendsipdi_media"
+  [(set (match_operand:PDI 0 "target_reg_operand" "=b,b");
+	(if_then_else:PDI (eq (and:SI (match_operand:SI 1 "target_operand"
+							  "r,Csy")
+				      (const_int 3))
+			      (const_int 3))
+			  (mem:PDI (match_dup 1))
+			  (sign_extend:PDI (match_dup 1))))]
+  "TARGET_SHMEDIA && !TARGET_PT_FIXED"
+  "@
+	ptabs	%1, %0
+	pt	%1, %0"
+  [(set_attr "type"   "ptabs_media,pt_media")
+   (set_attr "length" "4,*")])
+
+(define_insn "*truncdipdi_media"
+  [(set (match_operand:PDI 0 "target_reg_operand" "=b,b");
+	(if_then_else:PDI (eq (and:DI (match_operand:DI 1 "target_operand"
+							  "r,Csy")
+				      (const_int 3))
+			      (const_int 3))
+			  (mem:PDI (match_dup 1))
+			  (truncate:PDI (match_dup 1))))]
+  "TARGET_SHMEDIA && !TARGET_PT_FIXED"
+  "@
+	ptabs	%1, %0
+	pt	%1, %0"
+  [(set_attr "type"   "ptabs_media,pt_media")
+   (set_attr "length" "4,*")])
+
+(define_insn "*movsi_y"
+  [(set (match_operand:SI 0 "register_operand" "=y,y")
+	(match_operand:SI 1 "immediate_operand" "Qi,I08"))
+   (clobber (match_scratch:SI 2 "=&z,r"))]
+  "TARGET_SH2E
+   && (reload_in_progress || reload_completed)"
+  "#"
+  [(set_attr "length" "4")
+   (set_attr "type" "pcload,move")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(match_operand:SI 1 "immediate_operand" ""))
+   (clobber (match_operand:SI 2 "register_operand" ""))]
+  "TARGET_SH1"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+;; ------------------------------------------------------------------------
+;; Define the real conditional branch instructions.
+;; ------------------------------------------------------------------------
+
+(define_expand "branch_true"
+  [(set (pc) (if_then_else (ne (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_operand 0))
+			   (pc)))]
+  "TARGET_SH1")
+
+(define_expand "branch_false"
+  [(set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_operand 0))
+			   (pc)))]
+  "TARGET_SH1")
+
+(define_insn_and_split "*cbranch_t"
+  [(set (pc) (if_then_else (match_operand 1 "cbranch_treg_value")
+			   (label_ref (match_operand 0))
+			   (pc)))]
+  "TARGET_SH1"
+{
+  return output_branch (sh_eval_treg_value (operands[1]), insn, operands);
+}
+  "&& 1"
+  [(const_int 0)]
+{
+  /* Try to canonicalize the branch condition if it is not one of:
+	(ne (reg:SI T_REG) (const_int 0))
+	(eq (reg:SI T_REG) (const_int 0))
+
+     Instead of splitting out a new insn, we modify the current insn's
+     operands as needed.  This preserves things such as REG_DEAD notes.  */
+
+  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
+      && REG_P (XEXP (operands[1], 0)) && REGNO (XEXP (operands[1], 0)) == T_REG
+      && XEXP (operands[1], 1) == const0_rtx)
+    DONE;
+
+  int branch_cond = sh_eval_treg_value (operands[1]);
+  rtx new_cond_rtx = NULL_RTX;
+
+  if (branch_cond == 0)
+    new_cond_rtx = gen_rtx_EQ (VOIDmode, get_t_reg_rtx (), const0_rtx);
+  else if (branch_cond == 1)
+    new_cond_rtx = gen_rtx_NE (VOIDmode, get_t_reg_rtx (), const0_rtx);
+
+  if (new_cond_rtx != NULL_RTX)
+    validate_change (curr_insn, &XEXP (XEXP (PATTERN (curr_insn), 1), 0),
+		     new_cond_rtx, false);
+  DONE;
+}
+  [(set_attr "type" "cbranch")])
+
+;; Patterns to prevent reorg from re-combining a condbranch with a branch
+;; which destination is too far away.
+;; The const_int_operand is distinct for each branch target; it avoids
+;; unwanted matches with redundant_insn.
+(define_insn "block_branch_redirect"
+  [(set (pc) (unspec [(match_operand 0 "const_int_operand" "")] UNSPEC_BBR))]
+  "TARGET_SH1"
+  ""
+  [(set_attr "length" "0")])
+
+;; This one has the additional purpose to record a possible scratch register
+;; for the following branch.
+;; ??? Unfortunately, just setting the scratch register is not good enough,
+;; because the insn then might be deemed dead and deleted.  And we can't
+;; make the use in the jump insn explicit because that would disable
+;; delay slot scheduling from the target.
+(define_insn "indirect_jump_scratch"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand 1 "const_int_operand" "")] UNSPEC_BBR))
+   (set (pc) (unspec [(const_int 0)] UNSPEC_BBR))]
+  "TARGET_SH1"
+  ""
+  [(set_attr "length" "0")])
+
+;; This one is used to preemt an insn from beyond the bra / braf / jmp
+;; being pulled into the delay slot of a condbranch that has been made to
+;; jump around the unconditional jump because it was out of range.
+(define_insn "stuff_delay_slot"
+  [(set (pc)
+	(unspec [(match_operand:SI 0 "const_int_operand" "") (pc)
+		 (match_operand:SI 1 "const_int_operand" "")] UNSPEC_BBR))]
+  "TARGET_SH1"
+  ""
+  [(set_attr "length" "0")
+   (set_attr "cond_delay_slot" "yes")])
+
+;; Conditional branch insns
+
+(define_expand "cbranchint4_media"
+  [(set (pc)
+	(if_then_else (match_operator 0 "shmedia_cbranch_comparison_operator"
+		       [(match_operand 1 "" "")
+			(match_operand 2 "" "")])
+		      (match_operand 3 "" "")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+{
+  enum machine_mode mode = GET_MODE (operands[1]);
+  if (mode == VOIDmode)
+    mode = GET_MODE (operands[2]);
+  if (GET_CODE (operands[0]) == EQ || GET_CODE (operands[0]) == NE)
+    {
+      operands[1] = force_reg (mode, operands[1]);
+      if (CONSTANT_P (operands[2])
+          && (! satisfies_constraint_I06 (operands[2])))
+        operands[2] = force_reg (mode, operands[2]);
+    }
+  else
+    {
+      if (operands[1] != const0_rtx)
+        operands[1] = force_reg (mode, operands[1]);
+      if (operands[2] != const0_rtx)
+        operands[2] = force_reg (mode, operands[2]);
+    }
+  switch (GET_CODE (operands[0]))
+    {
+    case LEU:
+    case LE:
+    case LTU:
+    case LT:
+      operands[0] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[0])),
+				    VOIDmode, operands[2], operands[1]);
+      operands[1] = XEXP (operands[0], 0);
+      operands[2] = XEXP (operands[0], 1);
+      break;
+    default:
+      operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]),
+				    VOIDmode, operands[1], operands[2]);
+      break;
+    }
+  operands[3] = gen_rtx_LABEL_REF (Pmode, operands[3]);
+})
+
+(define_expand "cbranchfp4_media"
+  [(set (pc)
+	(if_then_else (match_operator 0 "sh_float_comparison_operator"
+		       [(match_operand 1 "" "")
+			(match_operand 2 "" "")])
+		      (match_operand 3 "" "")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  rtx cmp;
+  if (GET_CODE (operands[0]) == NE)
+    cmp = gen_rtx_EQ (SImode, operands[1], operands[2]);
+  else
+    cmp = gen_rtx_fmt_ee (GET_CODE (operands[0]), SImode,
+			  operands[1], operands[2]);
+
+  emit_insn (gen_cstore4_media (tmp, cmp, operands[1], operands[2]));
+
+  if (GET_CODE (cmp) == GET_CODE (operands[0]))
+    operands[0] = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
+  else
+    operands[0] = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  operands[1] = tmp;
+  operands[2] = const0_rtx;
+  operands[3] = gen_rtx_LABEL_REF (Pmode, operands[3]);
+})
+
+(define_insn "*beq_media_i"
+  [(set (pc)
+	(if_then_else (match_operator 3 "equality_comparison_operator"
+			[(match_operand:DI 1 "arith_reg_operand" "r,r")
+			 (match_operand:DI 2 "arith_operand" "r,I06")])
+		      (match_operand 0 "target_operand" "b,b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "@
+	b%o3%'	%1, %2, %0%>
+	b%o3i%'	%1, %2, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+(define_insn "*beq_media_i32"
+  [(set (pc)
+	(if_then_else (match_operator 3 "equality_comparison_operator"
+			[(match_operand:SI 1 "arith_reg_operand" "r,r")
+			 (match_operand:SI 2 "arith_operand" "r,I06")])
+		      (match_operand 0 "target_operand" "b,b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "@
+	b%o3%'	%1, %2, %0%>
+	b%o3i%'	%1, %2, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+(define_insn "*bgt_media_i"
+  [(set (pc)
+	(if_then_else (match_operator 3 "greater_comparison_operator"
+			[(match_operand:DI 1 "arith_reg_or_0_operand" "rN")
+			 (match_operand:DI 2 "arith_reg_or_0_operand" "rN")])
+		      (match_operand 0 "target_operand" "b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "b%o3%'	%N1, %N2, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+(define_insn "*bgt_media_i32"
+  [(set (pc)
+	(if_then_else (match_operator 3 "greater_comparison_operator"
+			[(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
+			 (match_operand:SI 2 "arith_reg_or_0_operand" "rN")])
+		      (match_operand 0 "target_operand" "b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "b%o3%'	%N1, %N2, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+;; These are only needed to make invert_jump() happy - otherwise, jump
+;; optimization will be silently disabled.
+(define_insn "*blt_media_i"
+  [(set (pc)
+	(if_then_else (match_operator 3 "less_comparison_operator"
+			[(match_operand:DI 1 "arith_reg_or_0_operand" "rN")
+			 (match_operand:DI 2 "arith_reg_or_0_operand" "rN")])
+		      (match_operand 0 "target_operand" "b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "b%o3%'	%N2, %N1, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+(define_insn "*blt_media_i32"
+  [(set (pc)
+	(if_then_else (match_operator 3 "less_comparison_operator"
+			[(match_operand:SI 1 "arith_reg_or_0_operand" "rN")
+			 (match_operand:SI 2 "arith_reg_or_0_operand" "rN")])
+		      (match_operand 0 "target_operand" "b")
+		      (pc)))]
+  "TARGET_SHMEDIA"
+  "b%o3%'	%N2, %N1, %0%>"
+  [(set_attr "type" "cbranch_media")])
+
+;; combiner splitter for test-and-branch on single bit in register.  This
+;; is endian dependent because the non-paradoxical subreg looks different
+;; on big endian.
+(define_split
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "equality_comparison_operator"
+	    [(subreg:SI
+	       (zero_extract:DI
+		 (subreg:DI (match_operand:SI 1 "extend_reg_operand" "") 0)
+		 (const_int 1)
+		 (match_operand 2 "const_int_operand" "")) 0)
+	     (const_int 0)])
+	  (match_operand 0 "target_operand" "")
+	  (pc)))
+   (clobber (match_operand:SI 4 "arith_reg_dest" ""))]
+  "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN"
+  [(set (match_dup 4) (ashift:SI (match_dup 1) (match_dup 5)))
+   (set (pc) (if_then_else (match_dup 6) (match_dup 0) (pc)))]
+{
+  operands[5] = GEN_INT (31 - INTVAL (operands[2]));
+  operands[6] = (GET_CODE (operands[3]) == EQ
+		 ? gen_rtx_GE (VOIDmode, operands[4], const0_rtx)
+		 : gen_rtx_GT (VOIDmode, const0_rtx, operands[4]));
+})
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the label to jump to at the top of the loop
+(define_expand "doloop_end"
+  [(parallel [(set (pc)
+		   (if_then_else (ne:SI (match_operand:SI 0 "" "")
+				        (const_int 1))
+				 (label_ref (match_operand 1 "" ""))
+				 (pc)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 0) (const_int -1)))
+	      (clobber (reg:SI T_REG))])]
+  "TARGET_SH2"
+{
+  if (GET_MODE (operands[0]) != SImode)
+    FAIL;
+  emit_jump_insn (gen_doloop_end_split (operands[0], operands[1], operands[0]));
+  DONE;
+})
+
+(define_insn_and_split "doloop_end_split"
+  [(set (pc)
+	(if_then_else (ne:SI (match_operand:SI 2 "arith_reg_dest" "0")
+			     (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (match_dup 2) (const_int -1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH2"
+  "#"
+  ""
+  [(parallel [(set (reg:SI T_REG)
+		   (eq:SI (match_dup 2) (const_int 1)))
+	      (set (match_dup 0) (plus:SI (match_dup 2) (const_int -1)))])
+   (set (pc) (if_then_else (eq (reg:SI T_REG) (const_int 0))
+			   (label_ref (match_dup 1))
+			   (pc)))]
+  ""
+  [(set_attr "type" "cbranch")])
+
+;; ------------------------------------------------------------------------
+;; Jump and linkage insns
+;; ------------------------------------------------------------------------
+
+(define_insn "jump_compact"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  "TARGET_SH1 && !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)"
+{
+  /* The length is 16 if the delay slot is unfilled.  */
+  if (get_attr_length(insn) > 4)
+    return output_far_jump(insn, operands[0]);
+  else
+    return "bra	%l0%#";
+}
+  [(set_attr "type" "jump")
+   (set_attr "needs_delay_slot" "yes")])
+
+;; ??? It would be much saner to explicitly use the scratch register
+;; in the jump insn, and have indirect_jump_scratch only set it,
+;; but fill_simple_delay_slots would refuse to do delay slot filling
+;; from the target then, as it uses simplejump_p.
+;;(define_insn "jump_compact_far"
+;;  [(set (pc)
+;;	(label_ref (match_operand 0 "" "")))
+;;   (use (match_operand 1 "register_operand" "r")]
+;;  "TARGET_SH1"
+;;  "* return output_far_jump(insn, operands[0], operands[1]);"
+;;  [(set_attr "type" "jump")
+;;   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "jump_media"
+  [(set (pc)
+	(match_operand 0 "target_operand" "b"))]
+  "TARGET_SHMEDIA"
+  "blink	%0, r63%>"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "jump"
+  [(set (pc)
+	(label_ref (match_operand 0 "" "")))]
+  ""
+{
+  if (TARGET_SH1)
+    emit_jump_insn (gen_jump_compact (operands[0]));
+  else if (TARGET_SHMEDIA)
+    {
+      if (reload_in_progress || reload_completed)
+	FAIL;
+      emit_jump_insn (gen_jump_media (gen_rtx_LABEL_REF (Pmode, operands[0])));
+    }
+  DONE;
+})
+
+(define_insn "force_mode_for_call"
+  [(use (reg:PSI FPSCR_REG))]
+  "TARGET_SHCOMPACT"
+  ""
+  [(set_attr "length" "0")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))])
+
+(define_insn "calli"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH1"
+{
+  if (TARGET_SH2A && (dbr_sequence_length () == 0))
+    return "jsr/n	@%0";
+  else
+    return "jsr	@%0%#";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+;; This is TBR relative jump instruction for SH2A architecture.
+;; Its use is enabled by assigning an attribute "function_vector"
+;; and the vector number to a function during its declaration.
+(define_insn "calli_tbr_rel"
+  [(call (mem (match_operand:SI 0 "symbol_ref_operand" ""))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH2A && sh2a_is_function_vector_call (operands[0])"
+{
+  unsigned HOST_WIDE_INT vect_num;
+  vect_num = sh2a_get_function_vector_number (operands[0]);
+  operands[2] = GEN_INT (vect_num * 4);
+
+  return "jsr/n	@@(%O2,tbr)";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "no")
+   (set_attr "fp_set" "unknown")])
+
+;; This is a pc-rel call, using bsrf, for use with PIC.
+(define_insn "calli_pcrel"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH2"
+{
+  return       "bsrf	%0"	"\n"
+	 "%O2:%#";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn_and_split "call_pcrel"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" ""))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (match_scratch:SI 2 "=r"))]
+  "TARGET_SH2"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx lab = PATTERN (gen_call_site ());
+
+  if (SYMBOL_REF_LOCAL_P (operands[0]))
+    emit_insn (gen_sym_label2reg (operands[2], operands[0], lab));
+  else
+    emit_insn (gen_symPLT_label2reg (operands[2], operands[0], lab));
+  emit_call_insn (gen_calli_pcrel (operands[2], operands[1], copy_rtx (lab)));
+  DONE;
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn "call_compact"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && ! (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%0%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_compact_rettramp"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%0%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_media"
+  [(call (mem:DI (match_operand 0 "target_reg_operand" "b"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:DI PR_MEDIA_REG))]
+  "TARGET_SHMEDIA"
+  "blink	%0, r18"
+  [(set_attr "type" "jump_media")])
+
+(define_insn "call_valuei"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH1"
+{
+  if (TARGET_SH2A && (dbr_sequence_length () == 0))
+    return "jsr/n	@%1";
+  else
+    return "jsr	@%1%#";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+;; This is TBR relative jump instruction for SH2A architecture.
+;; Its use is enabled by assigning an attribute "function_vector"
+;; and the vector number to a function during its declaration.
+(define_insn "call_valuei_tbr_rel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH2A && sh2a_is_function_vector_call (operands[1])"
+{
+  unsigned HOST_WIDE_INT vect_num;
+  vect_num = sh2a_get_function_vector_number (operands[1]);
+  operands[3] = GEN_INT (vect_num * 4);
+
+  return "jsr/n	@@(%O3,tbr)";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "no")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn "call_valuei_pcrel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SH2"
+{
+  return       "bsrf	%1"	"\n"
+	 "%O3:%#";
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn_and_split "call_value_pcrel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (match_scratch:SI 3 "=r"))]
+  "TARGET_SH2"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx lab = PATTERN (gen_call_site ());
+
+  if (SYMBOL_REF_LOCAL_P (operands[1]))
+    emit_insn (gen_sym_label2reg (operands[3], operands[1], lab));
+  else
+    emit_insn (gen_symPLT_label2reg (operands[3], operands[1], lab));
+  emit_call_insn (gen_call_valuei_pcrel (operands[0], operands[3],
+					 operands[2], copy_rtx (lab)));
+  DONE;
+}
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")
+   (set_attr "fp_set" "unknown")])
+
+(define_insn "call_value_compact"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (match_operand 3 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && ! (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%1%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_value_compact_rettramp"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (match_operand 3 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%1%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_value_media"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:DI (match_operand 1 "target_reg_operand" "b"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:DI PR_MEDIA_REG))]
+  "TARGET_SHMEDIA"
+  "blink	%1, r18"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "call"
+  [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" ""))
+			    (match_operand 1 "" ""))
+	      (match_operand 2 "" "")
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (reg:SI PR_REG))])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      operands[0] = shmedia_prepare_call_address (operands[0], 0);
+      emit_call_insn (gen_call_media (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && operands[2] && INTVAL (operands[2]))
+    {
+      rtx cookie_rtx = operands[2];
+      long cookie = INTVAL (cookie_rtx);
+      rtx func = XEXP (operands[0], 0);
+      rtx r0, r1;
+
+      if (flag_pic)
+	{
+	  if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	    {
+	      rtx reg = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symGOTPLT2reg (reg, func));
+	      func = reg;
+	    }
+	  else
+	    func = legitimize_pic_address (func, Pmode, 0);
+	}
+
+      r0 = gen_rtx_REG (SImode, R0_REG);
+      r1 = gen_rtx_REG (SImode, R1_REG);
+
+      /* Since such a call function may use all call-clobbered
+	 registers, we force a mode switch earlier, so that we don't
+	 run out of registers when adjusting fpscr for the call.  */
+      emit_insn (gen_force_mode_for_call ());
+
+      operands[0]
+	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+      operands[0] = force_reg (SImode, operands[0]);
+
+      emit_move_insn (r0, func);
+      emit_move_insn (r1, cookie_rtx);
+
+      if (cookie & CALL_COOKIE_RET_TRAMP (1))
+	emit_call_insn (gen_call_compact_rettramp (operands[0], operands[1],
+						   operands[2]));
+      else
+	emit_call_insn (gen_call_compact (operands[0], operands[1],
+					  operands[2]));
+
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && flag_pic
+	   && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+	   && ! SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[0], 0)));
+      XEXP (operands[0], 0) = reg;
+    }
+  if (!flag_pic && TARGET_SH2A
+      && MEM_P (operands[0])
+      && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF)
+    {
+      if (sh2a_is_function_vector_call (XEXP (operands[0], 0)))
+	{
+	  emit_call_insn (gen_calli_tbr_rel (XEXP (operands[0], 0),
+					     operands[1]));
+	  DONE;
+	}
+    }
+  if (flag_pic && TARGET_SH2
+      && MEM_P (operands[0])
+      && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF)
+    {
+      emit_call_insn (gen_call_pcrel (XEXP (operands[0], 0), operands[1]));
+      DONE;
+    }
+  else
+  {
+    operands[0] = force_reg (SImode, XEXP (operands[0], 0));
+    operands[1] = operands[2];
+  }
+
+  emit_call_insn (gen_calli (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "call_pop_compact"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "n")
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand 3 "immediate_operand" "n")))
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && ! (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%0%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_pop_compact_rettramp"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
+	 (match_operand 1 "" ""))
+   (match_operand 2 "immediate_operand" "n")
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand 3 "immediate_operand" "n")))
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && (INTVAL (operands[2]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%0%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "call_pop"
+  [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" ""))
+		    (match_operand 1 "" ""))
+	     (match_operand 2 "" "")
+	     (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+					   (match_operand 3 "" "")))])]
+  "TARGET_SHCOMPACT"
+{
+  rtx cookie_rtx;
+  long cookie;
+  rtx func;
+  rtx r0, r1;
+
+  gcc_assert (operands[2] && INTVAL (operands[2]));
+  cookie_rtx = operands[2];
+  cookie = INTVAL (cookie_rtx);
+  func = XEXP (operands[0], 0);
+
+  if (flag_pic)
+    {
+      if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	{
+	  rtx reg = gen_reg_rtx (Pmode);
+	  emit_insn (gen_symGOTPLT2reg (reg, func));
+	  func = reg;
+	}
+      else
+	func = legitimize_pic_address (func, Pmode, 0);
+    }
+
+  r0 = gen_rtx_REG (SImode, R0_REG);
+  r1 = gen_rtx_REG (SImode, R1_REG);
+
+  /* Since such a call function may use all call-clobbered
+     registers, we force a mode switch earlier, so that we don't
+     run out of registers when adjusting fpscr for the call.  */
+  emit_insn (gen_force_mode_for_call ());
+
+  operands[0] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+				 SFUNC_GOT);
+  operands[0] = force_reg (SImode, operands[0]);
+
+  emit_move_insn (r0, func);
+  emit_move_insn (r1, cookie_rtx);
+
+  if (cookie & CALL_COOKIE_RET_TRAMP (1))
+    emit_call_insn (gen_call_pop_compact_rettramp
+	   	     (operands[0], operands[1], operands[2], operands[3]));
+  else
+    emit_call_insn (gen_call_pop_compact
+	  	     (operands[0], operands[1], operands[2], operands[3]));
+
+  DONE;
+})
+
+(define_expand "call_value"
+  [(parallel [(set (match_operand 0 "arith_reg_operand" "")
+		   (call (mem:SI (match_operand 1 "arith_reg_operand" ""))
+				 (match_operand 2 "" "")))
+	      (match_operand 3 "" "")
+	      (use (reg:PSI FPSCR_REG))
+	      (clobber (reg:SI PR_REG))])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      operands[1] = shmedia_prepare_call_address (operands[1], 0);
+      emit_call_insn (gen_call_value_media (operands[0], operands[1],
+					    operands[2]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && operands[3] && INTVAL (operands[3]))
+    {
+      rtx cookie_rtx = operands[3];
+      long cookie = INTVAL (cookie_rtx);
+      rtx func = XEXP (operands[1], 0);
+      rtx r0, r1;
+
+      if (flag_pic)
+	{
+	  if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	    {
+	      rtx reg = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symGOTPLT2reg (reg, func));
+	      func = reg;
+	    }
+	  else
+	    func = legitimize_pic_address (func, Pmode, 0);
+	}
+
+      r0 = gen_rtx_REG (SImode, R0_REG);
+      r1 = gen_rtx_REG (SImode, R1_REG);
+
+      /* Since such a call function may use all call-clobbered
+	 registers, we force a mode switch earlier, so that we don't
+	 run out of registers when adjusting fpscr for the call.  */
+      emit_insn (gen_force_mode_for_call ());
+
+      operands[1]
+	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+      operands[1] = force_reg (SImode, operands[1]);
+
+      emit_move_insn (r0, func);
+      emit_move_insn (r1, cookie_rtx);
+
+      if (cookie & CALL_COOKIE_RET_TRAMP (1))
+	emit_call_insn (gen_call_value_compact_rettramp (operands[0],
+							 operands[1],
+							 operands[2],
+							 operands[3]));
+      else
+	emit_call_insn (gen_call_value_compact (operands[0], operands[1],
+						operands[2], operands[3]));
+
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && flag_pic
+	   && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+	   && ! SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOTPLT2reg (reg, XEXP (operands[1], 0)));
+      XEXP (operands[1], 0) = reg;
+    }
+  if (!flag_pic && TARGET_SH2A
+      && MEM_P (operands[1])
+      && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
+    {
+      if (sh2a_is_function_vector_call (XEXP (operands[1], 0)))
+	{
+	  emit_call_insn (gen_call_valuei_tbr_rel (operands[0],
+				 XEXP (operands[1], 0), operands[2]));
+	  DONE;
+	}
+    }
+  if (flag_pic && TARGET_SH2
+      && MEM_P (operands[1])
+      && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
+    {
+      emit_call_insn (gen_call_value_pcrel (operands[0], XEXP (operands[1], 0),
+					    operands[2]));
+      DONE;
+    }
+  else
+    operands[1] = force_reg (SImode, XEXP (operands[1], 0));
+
+  emit_call_insn (gen_call_valuei (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "sibcalli"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "k"))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH1"
+  "jmp	@%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcalli_pcrel"
+  [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "k"))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH2"
+{
+  return       "braf	%0"	"\n"
+	 "%O2:%#";
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+;; This uses an unspec to describe that the symbol_ref is very close.
+(define_insn "sibcalli_thunk"
+  [(call (mem:SI (unspec:SI [(match_operand:SI 0 "symbol_ref_operand" "")]
+			     UNSPEC_THUNK))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH1"
+  "bra	%O0"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump")
+   (set_attr "length" "2")])
+
+(define_insn_and_split "sibcall_pcrel"
+  [(call (mem:SI (match_operand:SI 0 "symbol_ref_operand" ""))
+	 (match_operand 1 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (match_scratch:SI 2 "=k"))
+   (return)]
+  "TARGET_SH2"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx lab = PATTERN (gen_call_site ());
+  rtx call_insn;
+
+  emit_insn (gen_sym_label2reg (operands[2], operands[0], lab));
+  call_insn = emit_call_insn (gen_sibcalli_pcrel (operands[2], operands[1],
+						  copy_rtx (lab)));
+  SIBLING_CALL_P (call_insn) = 1;
+  DONE;
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_compact"
+  [(call (mem:SI (match_operand:SI 0 "register_operand" "k,k"))
+	 (match_operand 1 "" ""))
+   (return)
+   (use (match_operand:SI 2 "register_operand" "z,x"))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   ;; We want to make sure the `x' above will only match MACH_REG
+   ;; because sibcall_epilogue may clobber MACL_REG.
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SHCOMPACT"
+{
+  static const char* alt[] =
+  {
+       "jmp	@%0%#",
+
+       "jmp	@%0"	"\n"
+    "	sts	%2,r0"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "needs_delay_slot" "yes,no")
+   (set_attr "length" "2,4")
+   (set (attr "fp_mode") (const_string "single"))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_media"
+  [(call (mem:DI (match_operand 0 "target_reg_operand" "k"))
+	 (match_operand 1 "" ""))
+   (use (reg:SI PR_MEDIA_REG))
+   (return)]
+  "TARGET_SHMEDIA"
+  "blink	%0, r63"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "sibcall"
+  [(parallel
+    [(call (mem:SI (match_operand 0 "arith_reg_operand" ""))
+	   (match_operand 1 "" ""))
+     (match_operand 2 "" "")
+     (use (reg:PSI FPSCR_REG))
+     (return)])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      operands[0] = shmedia_prepare_call_address (operands[0], 1);
+      emit_call_insn (gen_sibcall_media (operands[0], operands[1]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && operands[2]
+	   && (INTVAL (operands[2]) & ~ CALL_COOKIE_RET_TRAMP (1)))
+    {
+      rtx cookie_rtx = operands[2];
+      long cookie = INTVAL (cookie_rtx);
+      rtx func = XEXP (operands[0], 0);
+      rtx mach, r1;
+
+      if (flag_pic)
+	{
+	  if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	    {
+	      rtx reg = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symGOT2reg (reg, func));
+	      func = reg;
+	    }
+	  else
+	    func = legitimize_pic_address (func, Pmode, 0);
+	}
+
+      /* FIXME: if we could tell whether all argument registers are
+	 already taken, we could decide whether to force the use of
+	 MACH_REG or to stick to R0_REG.  Unfortunately, there's no
+	 simple way to tell.  We could use the CALL_COOKIE, but we
+	 can't currently tell a register used for regular argument
+	 passing from one that is unused.  If we leave it up to reload
+	 to decide which register to use, it seems to always choose
+	 R0_REG, which leaves no available registers in SIBCALL_REGS
+	 to hold the address of the trampoline.  */
+      mach = gen_rtx_REG (SImode, MACH_REG);
+      r1 = gen_rtx_REG (SImode, R1_REG);
+
+      /* Since such a call function may use all call-clobbered
+	 registers, we force a mode switch earlier, so that we don't
+	 run out of registers when adjusting fpscr for the call.  */
+      emit_insn (gen_force_mode_for_call ());
+
+      operands[0]
+	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+      operands[0] = force_reg (SImode, operands[0]);
+
+      /* We don't need a return trampoline, since the callee will
+	 return directly to the upper caller.  */
+      if (cookie & CALL_COOKIE_RET_TRAMP (1))
+	{
+	  cookie &= ~ CALL_COOKIE_RET_TRAMP (1);
+	  cookie_rtx = GEN_INT (cookie);
+	}
+
+      emit_move_insn (mach, func);
+      emit_move_insn (r1, cookie_rtx);
+
+      emit_call_insn (gen_sibcall_compact (operands[0], operands[1], mach));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && flag_pic
+	   && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+	   && ! SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOT2reg (reg, XEXP (operands[0], 0)));
+      XEXP (operands[0], 0) = reg;
+    }
+  if (flag_pic && TARGET_SH2
+      && MEM_P (operands[0])
+      && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
+      /* The PLT needs the PIC register, but the epilogue would have
+	 to restore it, so we can only use PC-relative PIC calls for
+	 static functions.  */
+      && SYMBOL_REF_LOCAL_P (XEXP (operands[0], 0)))
+    {
+      emit_call_insn (gen_sibcall_pcrel (XEXP (operands[0], 0), operands[1]));
+      DONE;
+    }
+  else
+    operands[0] = force_reg (SImode, XEXP (operands[0], 0));
+
+  emit_call_insn (gen_sibcalli (operands[0], operands[1]));
+  DONE;
+})
+
+(define_insn "sibcall_valuei"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "k"))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH1"
+  "jmp	@%1%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_valuei_pcrel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "k"))
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (use (reg:PSI FPSCR_REG))
+   (return)]
+  "TARGET_SH2"
+{
+  return       "braf	%1"	"\n"
+	 "%O3:%#";
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn_and_split "sibcall_value_pcrel"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "symbol_ref_operand" ""))
+	      (match_operand 2 "" "")))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (match_scratch:SI 3 "=k"))
+   (return)]
+  "TARGET_SH2"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx lab = PATTERN (gen_call_site ());
+  rtx call_insn;
+
+  emit_insn (gen_sym_label2reg (operands[3], operands[1], lab));
+  call_insn = emit_call_insn (gen_sibcall_valuei_pcrel (operands[0],
+							operands[3],
+							operands[2],
+							copy_rtx (lab)));
+  SIBLING_CALL_P (call_insn) = 1;
+  DONE;
+}
+  [(set_attr "needs_delay_slot" "yes")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_value_compact"
+  [(set (match_operand 0 "" "=rf,rf")
+	(call (mem:SI (match_operand:SI 1 "register_operand" "k,k"))
+	      (match_operand 2 "" "")))
+   (return)
+   (use (match_operand:SI 3 "register_operand" "z,x"))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   ;; We want to make sure the `x' above will only match MACH_REG
+   ;; because sibcall_epilogue may clobber MACL_REG.
+   (clobber (reg:SI MACL_REG))]
+  "TARGET_SHCOMPACT"
+{
+  static const char* alt[] =
+  {
+       "jmp	@%1%#",
+
+       "jmp	@%1"	"\n"
+    "	sts	%3,r0"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "needs_delay_slot" "yes,no")
+   (set_attr "length" "2,4")
+   (set (attr "fp_mode") (const_string "single"))
+   (set_attr "type" "jump_ind")])
+
+(define_insn "sibcall_value_media"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:DI (match_operand 1 "target_reg_operand" "k"))
+	      (match_operand 2 "" "")))
+   (use (reg:SI PR_MEDIA_REG))
+   (return)]
+  "TARGET_SHMEDIA"
+  "blink	%1, r63"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "sibcall_value"
+  [(parallel
+    [(set (match_operand 0 "arith_reg_operand" "")
+	  (call (mem:SI (match_operand 1 "arith_reg_operand" ""))
+	  	(match_operand 2 "" "")))
+     (match_operand 3 "" "")
+     (use (reg:PSI FPSCR_REG))
+     (return)])]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      operands[1] = shmedia_prepare_call_address (operands[1], 1);
+      emit_call_insn (gen_sibcall_value_media (operands[0], operands[1],
+					       operands[2]));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && operands[3]
+	   && (INTVAL (operands[3]) & ~ CALL_COOKIE_RET_TRAMP (1)))
+    {
+      rtx cookie_rtx = operands[3];
+      long cookie = INTVAL (cookie_rtx);
+      rtx func = XEXP (operands[1], 0);
+      rtx mach, r1;
+
+      if (flag_pic)
+	{
+	  if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	    {
+	      rtx reg = gen_reg_rtx (Pmode);
+
+	      emit_insn (gen_symGOT2reg (reg, func));
+	      func = reg;
+	    }
+	  else
+	    func = legitimize_pic_address (func, Pmode, 0);
+	}
+
+      /* FIXME: if we could tell whether all argument registers are
+	 already taken, we could decide whether to force the use of
+	 MACH_REG or to stick to R0_REG.  Unfortunately, there's no
+	 simple way to tell.  We could use the CALL_COOKIE, but we
+	 can't currently tell a register used for regular argument
+	 passing from one that is unused.  If we leave it up to reload
+	 to decide which register to use, it seems to always choose
+	 R0_REG, which leaves no available registers in SIBCALL_REGS
+	 to hold the address of the trampoline.  */
+      mach = gen_rtx_REG (SImode, MACH_REG);
+      r1 = gen_rtx_REG (SImode, R1_REG);
+
+      /* Since such a call function may use all call-clobbered
+	 registers, we force a mode switch earlier, so that we don't
+	 run out of registers when adjusting fpscr for the call.  */
+      emit_insn (gen_force_mode_for_call ());
+
+      operands[1]
+	= function_symbol (NULL, "__GCC_shcompact_call_trampoline", SFUNC_GOT);
+      operands[1] = force_reg (SImode, operands[1]);
+
+      /* We don't need a return trampoline, since the callee will
+	 return directly to the upper caller.  */
+      if (cookie & CALL_COOKIE_RET_TRAMP (1))
+	{
+	  cookie &= ~ CALL_COOKIE_RET_TRAMP (1);
+	  cookie_rtx = GEN_INT (cookie);
+	}
+
+      emit_move_insn (mach, func);
+      emit_move_insn (r1, cookie_rtx);
+
+      emit_call_insn (gen_sibcall_value_compact (operands[0], operands[1],
+						 operands[2], mach));
+      DONE;
+    }
+  else if (TARGET_SHCOMPACT && flag_pic
+	   && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+	   && ! SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
+    {
+      rtx reg = gen_reg_rtx (Pmode);
+
+      emit_insn (gen_symGOT2reg (reg, XEXP (operands[1], 0)));
+      XEXP (operands[1], 0) = reg;
+    }
+  if (flag_pic && TARGET_SH2
+      && MEM_P (operands[1])
+      && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+      /* The PLT needs the PIC register, but the epilogue would have
+	 to restore it, so we can only use PC-relative PIC calls for
+	 static functions.  */
+      && SYMBOL_REF_LOCAL_P (XEXP (operands[1], 0)))
+    {
+      emit_call_insn (gen_sibcall_value_pcrel (operands[0],
+					       XEXP (operands[1], 0),
+					       operands[2]));
+      DONE;
+    }
+  else
+    operands[1] = force_reg (SImode, XEXP (operands[1], 0));
+
+  emit_call_insn (gen_sibcall_valuei (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "call_value_pop_compact"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand 4 "immediate_operand" "n")))
+   (match_operand 3 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && ! (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%1%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "call_value_pop_compact_rettramp"
+  [(set (match_operand 0 "" "=rf")
+	(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+	      (match_operand 2 "" "")))
+   (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+				 (match_operand 4 "immediate_operand" "n")))
+   (match_operand 3 "immediate_operand" "n")
+   (use (reg:SI R0_REG))
+   (use (reg:SI R1_REG))
+   (use (reg:PSI FPSCR_REG))
+   (clobber (reg:SI R10_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT && (INTVAL (operands[3]) & CALL_COOKIE_RET_TRAMP (1))"
+  "jsr	@%1%#"
+  [(set_attr "type" "call")
+   (set (attr "fp_mode")
+	(if_then_else (eq_attr "fpu_single" "yes")
+		      (const_string "single") (const_string "double")))
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_expand "call_value_pop"
+  [(parallel [(set (match_operand 0 "arith_reg_operand" "")
+		   (call (mem:SI (match_operand 1 "arith_reg_operand" ""))
+				 (match_operand 2 "" "")))
+	      (match_operand 3 "" "")
+	      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG)
+					    (match_operand 4 "" "")))])]
+  "TARGET_SHCOMPACT"
+{
+  rtx cookie_rtx;
+  long cookie;
+  rtx func;
+  rtx r0, r1;
+
+  gcc_assert (TARGET_SHCOMPACT && operands[3] && INTVAL (operands[3]));
+  cookie_rtx = operands[3];
+  cookie = INTVAL (cookie_rtx);
+  func = XEXP (operands[1], 0);
+
+  if (flag_pic)
+    {
+      if (GET_CODE (func) == SYMBOL_REF && ! SYMBOL_REF_LOCAL_P (func))
+	{
+	  rtx reg = gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_symGOTPLT2reg (reg, func));
+	  func = reg;
+	}
+      else
+	func = legitimize_pic_address (func, Pmode, 0);
+    }
+
+  r0 = gen_rtx_REG (SImode, R0_REG);
+  r1 = gen_rtx_REG (SImode, R1_REG);
+
+  /* Since such a call function may use all call-clobbered
+     registers, we force a mode switch earlier, so that we don't
+     run out of registers when adjusting fpscr for the call.  */
+  emit_insn (gen_force_mode_for_call ());
+
+  operands[1] = function_symbol (NULL, "__GCC_shcompact_call_trampoline",
+				 SFUNC_GOT);
+  operands[1] = force_reg (SImode, operands[1]);
+
+  emit_move_insn (r0, func);
+  emit_move_insn (r1, cookie_rtx);
+
+  if (cookie & CALL_COOKIE_RET_TRAMP (1))
+    emit_call_insn (gen_call_value_pop_compact_rettramp
+			(operands[0], operands[1], operands[2],
+			 operands[3], operands[4]));
+  else
+    emit_call_insn (gen_call_value_pop_compact
+			(operands[0], operands[1], operands[2],
+			 operands[3], operands[4]));
+
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+{
+  sh_expand_epilogue (true);
+  if (TARGET_SHCOMPACT)
+    {
+      rtx insn, set;
+
+      /* If epilogue clobbers r0, preserve it in macl.  */
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	if ((set = single_set (insn))
+	    && REG_P (SET_DEST (set))
+	    && REGNO (SET_DEST (set)) == R0_REG)
+	  {
+	    rtx r0 = gen_rtx_REG (SImode, R0_REG);
+	    rtx tmp = gen_rtx_REG (SImode, MACL_REG);
+
+	    /* We can't tell at this point whether the sibcall is a
+	       sibcall_compact and, if it is, whether it uses r0 or
+	       mach as operand 2, so let the instructions that
+	       preserve r0 be optimized away if r0 turns out to be
+	       dead.  */
+	    emit_insn_before (gen_rtx_SET (SImode, tmp, r0), insn);
+	    emit_move_insn (r0, tmp);
+	    break;
+	  }
+    }
+  DONE;
+})
+
+(define_insn "indirect_jump_compact"
+  [(set (pc)
+	(match_operand:SI 0 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "jmp	@%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set_attr "type" "jump_ind")])
+
+(define_expand "indirect_jump"
+  [(set (pc)
+	(match_operand 0 "register_operand" ""))]
+  ""
+{
+  if (GET_MODE (operands[0]) != Pmode)
+    operands[0] = gen_rtx_SUBREG (Pmode, operands[0], 0);
+})
+
+;; The use of operand 1 / 2 helps us distinguish case table jumps
+;; which can be present in structured code from indirect jumps which can not
+;; be present in structured code.  This allows -fprofile-arcs to work.
+
+;; For SH1 processors.
+(define_insn "casesi_jump_1"
+  [(set (pc)
+	(match_operand:SI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "TARGET_SH1"
+  "jmp	@%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set_attr "type" "jump_ind")])
+
+;; For all later processors.
+(define_insn "casesi_jump_2"
+  [(set (pc) (plus:SI (match_operand:SI 0 "register_operand" "r")
+		      (label_ref (match_operand 1 "" ""))))
+   (use (label_ref (match_operand 2 "" "")))]
+  "TARGET_SH2
+   && (! INSN_UID (operands[1]) || prev_real_insn (operands[1]) == insn)"
+  "braf	%0%#"
+  [(set_attr "needs_delay_slot" "yes")
+   (set_attr "type" "jump_ind")])
+
+(define_insn "casesi_jump_media"
+  [(set (pc) (match_operand 0 "target_reg_operand" "b"))
+   (use (label_ref (match_operand 1 "" "")))]
+  "TARGET_SHMEDIA"
+  "blink	%0, r63"
+  [(set_attr "type" "jump_media")])
+
+;; Call subroutine returning any type.
+;; ??? This probably doesn't work.
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
+  "(TARGET_SH2E || TARGET_SH2A) || TARGET_SHMEDIA"
+{
+  emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
+
+  for (int i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; ------------------------------------------------------------------------
+;; Misc insns
+;; ------------------------------------------------------------------------
+
+(define_insn "dect"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:SI 1 "arith_reg_dest" "0") (const_int 1)))
+   (set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(plus:SI (match_dup 1) (const_int -1)))]
+  "TARGET_SH2"
+  "dt	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop")
+
+;; Load address of a label. This is only generated by the casesi expand,
+;; and by machine_dependent_reorg (fixing up fp moves).
+;; This must use unspec, because this only works for labels that are
+;; within range.
+(define_insn "mova"
+  [(set (reg:SI R0_REG)
+	(unspec:SI [(label_ref (match_operand 0 "" ""))] UNSPEC_MOVA))]
+  "TARGET_SH1"
+  "mova	%O0,r0"
+  [(set_attr "in_delay_slot" "no")
+   (set_attr "type" "arith")])
+
+;; machine_dependent_reorg will make this a `mova'.
+(define_insn "mova_const"
+  [(set (reg:SI R0_REG)
+	(unspec:SI [(match_operand 0 "immediate_operand" "i")] UNSPEC_MOVA))]
+  "TARGET_SH1"
+  "#"
+  [(set_attr "in_delay_slot" "no")
+   (set_attr "type" "arith")])
+
+(define_expand "GOTaddr2picreg"
+  [(set (reg:SI R0_REG)
+	(unspec:SI [(const:SI (unspec:SI [(match_dup 1)] UNSPEC_PIC))]
+		   UNSPEC_MOVA))
+   (set (match_dup 0) (const:SI (unspec:SI [(match_dup 1)] UNSPEC_PIC)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI R0_REG)))]
+  ""
+{
+  if (TARGET_VXWORKS_RTP)
+    {
+      rtx gott_base = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
+      rtx gott_index = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
+      emit_insn (gen_vxworks_picreg (gott_base, gott_index));
+      DONE;
+    }
+
+  operands[0] = gen_rtx_REG (Pmode, PIC_REG);
+  operands[1] = gen_rtx_SYMBOL_REF (VOIDmode, GOT_SYMBOL_NAME);
+
+  if (TARGET_SHMEDIA)
+    {
+      rtx tr = gen_rtx_REG (Pmode, TR0_REG);
+      rtx pic = operands[0];
+      rtx lab = PATTERN (gen_call_site ());
+      rtx insn, equiv;
+
+      equiv = operands[1];
+      operands[1] = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[1], lab),
+				    UNSPEC_PCREL_SYMOFF);
+      operands[1] = gen_rtx_CONST (Pmode, operands[1]);
+
+      if (Pmode == SImode)
+	{
+	  emit_insn (gen_movsi_const (pic, operands[1]));
+	  emit_insn (gen_ptrel_si (tr, pic, copy_rtx (lab)));
+	}
+      else
+	{
+	  emit_insn (gen_movdi_const (pic, operands[1]));
+	  emit_insn (gen_ptrel_di (tr, pic, copy_rtx (lab)));
+	}
+
+      insn = emit_move_insn (operands[0], tr);
+
+      set_unique_reg_note (insn, REG_EQUAL, equiv);
+
+      DONE;
+    }
+})
+
+;; A helper for GOTaddr2picreg to finish up the initialization of the
+;; PIC register.
+(define_expand "vxworks_picreg"
+  [(set (reg:SI PIC_REG)
+	(const:SI (unspec:SI [(match_operand:SI 0 "" "")] UNSPEC_PIC)))
+   (set (reg:SI R0_REG)
+	(const:SI (unspec:SI [(match_operand:SI 1 "" "")] UNSPEC_PIC)))
+   (set (reg:SI PIC_REG)
+	(mem:SI (reg:SI PIC_REG)))
+   (set (reg:SI PIC_REG)
+	(mem:SI (plus:SI (reg:SI PIC_REG)
+			 (reg:SI R0_REG))))]
+  "TARGET_VXWORKS_RTP")
+
+(define_insn "*ptb"
+  [(set (match_operand 0 "target_reg_operand" "=b")
+	(const (unspec [(match_operand 1 "" "Csy")]
+			     UNSPEC_DATALABEL)))]
+  "TARGET_SHMEDIA && flag_pic
+   && satisfies_constraint_Csy (operands[1])"
+  "ptb/u	datalabel %1, %0"
+  [(set_attr "type" "ptabs_media")
+   (set_attr "length" "*")])
+
+(define_insn "ptrel_si"
+  [(set (match_operand:SI 0 "target_reg_operand" "=b")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+	      (pc)))
+   (match_operand:SI 2 "" "")]
+  "TARGET_SHMEDIA"
+  "%O2: ptrel/u	%1, %0"
+  [(set_attr "type" "ptabs_media")])
+
+(define_insn "ptrel_di"
+  [(set (match_operand:DI 0 "target_reg_operand" "=b")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+	      (pc)))
+   (match_operand:DI 2 "" "")]
+  "TARGET_SHMEDIA"
+  "%O2: ptrel/u	%1, %0"
+  [(set_attr "type" "ptabs_media")])
+
+(define_expand "builtin_setjmp_receiver"
+  [(match_operand 0 "" "")]
+  "flag_pic"
+{
+  emit_insn (gen_GOTaddr2picreg ());
+  DONE;
+})
+
+(define_expand "call_site"
+  [(unspec [(match_dup 0)] UNSPEC_CALLER)]
+  "TARGET_SH1"
+{
+  static HOST_WIDE_INT i = 0;
+  operands[0] = GEN_INT (i);
+  i++;
+})
+
+;; op0 = op1 + r12 but hide it before reload completed.  See the comment
+;; in symGOT_load expand.
+(define_insn_and_split "chk_guard_add"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (reg:SI PIC_REG)]
+		   UNSPEC_CHKADD))]
+  "TARGET_SH1"
+  "#"
+  "TARGET_SH1 && reload_completed"
+  [(set (match_dup 0) (reg:SI PIC_REG))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))]
+  ""
+  [(set_attr "type" "arith")])
+
+(define_expand "sym_label2reg"
+  [(set (match_operand:SI 0 "" "")
+	(const:SI (unspec:SI [(match_operand:SI 1 "" "")
+			      (const (plus:SI (match_operand:SI 2 "" "")
+					      (const_int 2)))]
+			     UNSPEC_SYMOFF)))]
+  "TARGET_SH1" "")
+
+(define_expand "symGOT_load"
+  [(set (match_dup 2) (match_operand 1 "" ""))
+   (set (match_dup 3) (plus (match_dup 2) (reg PIC_REG)))
+   (set (match_operand 0 "" "") (mem (match_dup 3)))]
+  ""
+{
+  rtx mem;
+
+  operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+  operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
+
+  if (TARGET_SHMEDIA)
+    {
+      rtx reg = operands[2];
+
+      if (Pmode == DImode)
+	{      
+	  if (flag_pic > 1)
+	    emit_insn (gen_movdi_const_32bit (reg, operands[1]));
+	  else
+	    emit_insn (gen_movdi_const_16bit (reg, operands[1]));
+	}
+      else
+	{
+	  if (flag_pic > 1)
+	    emit_insn (gen_movsi_const (reg, operands[1]));
+	  else
+	    emit_insn (gen_movsi_const_16bit (reg, operands[1]));
+	}
+    }
+  else
+    emit_move_insn (operands[2], operands[1]);
+
+  /* When stack protector inserts codes after the result is set to
+     R0, @(rX, r12) will cause a spill failure for R0.  Use a unspec
+     insn to avoid combining (set A (plus rX r12)) and (set op0 (mem A))
+     when rX is a GOT address for the guard symbol.  Ugly but doesn't
+     matter because this is a rare situation.  */
+  if (!TARGET_SHMEDIA
+      && flag_stack_protect
+      && GET_CODE (operands[1]) == CONST
+      && GET_CODE (XEXP (operands[1], 0)) == UNSPEC
+      && GET_CODE (XVECEXP (XEXP (operands[1], 0), 0, 0)) == SYMBOL_REF
+      && strcmp (XSTR (XVECEXP (XEXP (operands[1], 0), 0, 0), 0),
+		 "__stack_chk_guard") == 0)
+    emit_insn (gen_chk_guard_add (operands[3], operands[2]));
+  else
+    emit_move_insn (operands[3], gen_rtx_PLUS (Pmode, operands[2],
+					       gen_rtx_REG (Pmode, PIC_REG)));
+
+  /* N.B. This is not constant for a GOTPLT relocation.  */
+  mem = gen_rtx_MEM (Pmode, operands[3]);
+  MEM_NOTRAP_P (mem) = 1;
+  /* ??? Should we have a special alias set for the GOT?  */
+  emit_move_insn (operands[0], mem);
+
+  DONE;
+})
+
+(define_expand "sym2GOT"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOT))]
+  ""
+  "")
+
+(define_expand "symGOT2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  ""
+{
+  rtx gotsym, insn;
+
+  gotsym = gen_sym2GOT (operands[1]);
+  PUT_MODE (gotsym, Pmode);
+  insn = emit_insn (gen_symGOT_load (operands[0], gotsym));
+
+  MEM_READONLY_P (SET_SRC (PATTERN (insn))) = 1;
+
+  DONE;
+})
+
+(define_expand "symGOTPLT2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  ""
+{
+  rtx pltsym = gen_rtx_CONST (Pmode,
+			      gen_rtx_UNSPEC (Pmode,
+					      gen_rtvec (1, operands[1]),
+					      UNSPEC_GOTPLT));
+  emit_insn (gen_symGOT_load (operands[0], pltsym));
+  DONE;
+})
+
+(define_expand "sym2GOTOFF"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOTOFF))]
+  ""
+  "")
+
+(define_expand "symGOTOFF2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  ""
+{
+  rtx gotoffsym, insn;
+  rtx t = (!can_create_pseudo_p ()
+	   ? operands[0]
+	   : gen_reg_rtx (GET_MODE (operands[0])));
+
+  gotoffsym = gen_sym2GOTOFF (operands[1]);
+  PUT_MODE (gotoffsym, Pmode);
+  emit_move_insn (t, gotoffsym);
+  insn = emit_move_insn (operands[0],
+			 gen_rtx_PLUS (Pmode, t,
+				       gen_rtx_REG (Pmode, PIC_REG)));
+
+  set_unique_reg_note (insn, REG_EQUAL, operands[1]);
+
+  DONE;
+})
+
+(define_expand "symPLT_label2reg"
+  [(set (match_operand:SI 0 "" "")
+	(const:SI
+	 (unspec:SI
+	  [(const:SI (unspec:SI [(match_operand:SI 1 "" "")] UNSPEC_PLT))
+	   (const:SI (plus:SI (match_operand:SI 2 "" "")
+			      (const_int 2)))] UNSPEC_PCREL_SYMOFF)))
+   ;; Even though the PIC register is not really used by the call
+   ;; sequence in which this is expanded, the PLT code assumes the PIC
+   ;; register is set, so we must not skip its initialization.  Since
+   ;; we only use this expand as part of calling sequences, and never
+   ;; to take the address of a function, this is the best point to
+   ;; insert the (use).  Using the PLT to take the address of a
+   ;; function would be wrong, not only because the PLT entry could
+   ;; then be called from a function that doesn't initialize the PIC
+   ;; register to the proper GOT, but also because pointers to the
+   ;; same function might not compare equal, should they be set by
+   ;; different shared libraries.
+   (use (reg:SI PIC_REG))]
+  "TARGET_SH1"
+  "")
+
+(define_expand "sym2PIC"
+  [(const (unspec [(match_operand:SI 0 "" "")] UNSPEC_PIC))]
+  ""
+  "")
+
+;; -------------------------------------------------------------------------
+;; TLS code generation.
+
+;; FIXME: The multi-insn asm blocks should be converted to use
+;; define_insn_and_split.
+;; See the thread [PATCH/RFA] SH TLS support on gcc-patches
+;; <http://gcc.gnu.org/ml/gcc-patches/2003-02/msg01898.html>
+;; for details.
+
+(define_insn "tls_global_dynamic"
+  [(set (match_operand:SI 0 "register_operand" "=&z")
+	(call:SI (mem:SI (unspec:SI [(match_operand:SI 1 "" "")]
+				  UNSPEC_TLSGD))
+	      (const_int 0)))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (scratch:SI))]
+  "TARGET_SH1"
+{
+  return       "mov.l	1f,r4"			"\n"
+	 "	mova	2f,r0"			"\n"
+	 "	mov.l	2f,r1"			"\n"
+	 "	add	r0,r1"			"\n"
+	 "	jsr	@r1"			"\n"
+	 "	add	r12,r4"			"\n"
+	 "	bra	3f"			"\n"
+	 "	nop"				"\n"
+	 "	.align	2"			"\n"
+	 "1:	.long	%a1@TLSGD"		"\n"
+	 "2:	.long	__tls_get_addr@PLT"	"\n"
+	 "3:";
+}
+  [(set_attr "type" "tls_load")
+   (set_attr "length" "26")])
+
+(define_insn "tls_local_dynamic"
+  [(set (match_operand:SI 0 "register_operand" "=&z")
+	(call:SI (mem:SI (unspec:SI [(match_operand:SI 1 "" "")]
+				  UNSPEC_TLSLDM))
+	      (const_int 0)))
+   (use (reg:PSI FPSCR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI PR_REG))
+   (clobber (scratch:SI))]
+  "TARGET_SH1"
+{
+  return       "mov.l	1f,r4"			"\n"
+	 "	mova	2f,r0"			"\n"
+	 "	mov.l	2f,r1"			"\n"
+	 "	add	r0,r1"			"\n"
+	 "	jsr	@r1"			"\n"
+	 "	add	r12,r4"			"\n"
+	 "	bra	3f"			"\n"
+	 "	nop"				"\n"
+	 "	.align	2"			"\n"
+	 "1:	.long	%a1@TLSLDM"		"\n"
+	 "2:	.long	__tls_get_addr@PLT"	"\n"
+	 "3:";
+}
+  [(set_attr "type" "tls_load")
+   (set_attr "length" "26")])
+
+(define_expand "sym2DTPOFF"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_DTPOFF))]
+  ""
+  "")
+
+(define_expand "symDTPOFF2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "") (match_operand 2 "" "")]
+  ""
+{
+  rtx dtpoffsym;
+  rtx t = (!can_create_pseudo_p ()
+	   ? operands[0]
+	   : gen_reg_rtx (GET_MODE (operands[0])));
+
+  dtpoffsym = gen_sym2DTPOFF (operands[1]);
+  PUT_MODE (dtpoffsym, Pmode);
+  emit_move_insn (t, dtpoffsym);
+  emit_move_insn (operands[0], gen_rtx_PLUS (Pmode, t, operands[2]));
+  DONE;
+})
+
+(define_expand "sym2GOTTPOFF"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_GOTTPOFF))]
+  ""
+  "")
+
+(define_insn "tls_initial_exec"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec:SI [(match_operand:SI 1 "" "")]
+		    UNSPEC_TLSIE))
+   (use (reg:SI GBR_REG))
+   (use (reg:SI PIC_REG))
+   (clobber (reg:SI R0_REG))]
+  ""
+{
+  return       "mov.l	1f,r0"		"\n"
+	 "	stc	gbr,%0"		"\n"
+	 "	mov.l	@(r0,r12),r0"	"\n"
+	 "	bra	2f"		"\n"
+	 "	add	r0,%0"		"\n"
+	 "	.align	2"		"\n"
+	 "1:	.long	%a1"		"\n"
+	 "2:";
+}
+  [(set_attr "type" "tls_load")
+   (set_attr "length" "16")])
+
+(define_expand "sym2TPOFF"
+  [(const (unspec [(match_operand 0 "" "")] UNSPEC_TPOFF))]
+  ""
+  "")
+
+(define_expand "symTPOFF2reg"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  ""
+{
+  rtx tpoffsym;
+
+  tpoffsym = gen_sym2TPOFF (operands[1]);
+  PUT_MODE (tpoffsym, Pmode);
+  emit_move_insn (operands[0], tpoffsym);
+  DONE;
+})
+
+;;------------------------------------------------------------------------------
+;; Thread pointer getter and setter.
+;;
+;; On SH the thread pointer is kept in the GBR.
+;; These patterns are usually expanded from the respective built-in functions.
+(define_expand "get_thread_pointersi"
+  [(set (match_operand:SI 0 "register_operand") (reg:SI GBR_REG))]
+  "TARGET_SH1")
+
+;; The store_gbr insn can also be used on !TARGET_SH1 for doing TLS accesses.
+(define_insn "store_gbr"
+  [(set (match_operand:SI 0 "register_operand" "=r") (reg:SI GBR_REG))]
+  ""
+  "stc	gbr,%0"
+  [(set_attr "type" "tls_load")])
+
+(define_expand "set_thread_pointersi"
+  [(set (reg:SI GBR_REG)
+	(unspec_volatile:SI [(match_operand:SI 0 "register_operand")]
+	 UNSPECV_GBR))]
+  "TARGET_SH1")
+
+(define_insn "load_gbr"
+  [(set (reg:SI GBR_REG)
+	(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
+	 UNSPECV_GBR))]
+  "TARGET_SH1"
+  "ldc	%0,gbr"
+  [(set_attr "type" "move")])
+
+;;------------------------------------------------------------------------------
+;; Thread pointer relative memory loads and stores.
+;;
+;; On SH there are GBR displacement address modes which can be utilized to
+;; access memory behind the thread pointer.
+;; Since we do not allow using GBR for general purpose memory accesses, these
+;; GBR addressing modes are formed by the combine pass.
+;; This could be done with fewer patterns than below by using a mem predicate
+;; for the GBR mem, but then reload would try to reload addresses with a
+;; zero displacement for some strange reason.
+
+(define_insn "*mov<mode>_gbr_load"
+  [(set (match_operand:QIHISI 0 "register_operand" "=z")
+	(mem:QIHISI (plus:SI (reg:SI GBR_REG)
+			     (match_operand:QIHISI 1 "gbr_displacement"))))]
+  "TARGET_SH1"
+  "mov.<bwl>	@(%O1,gbr),%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*mov<mode>_gbr_load"
+  [(set (match_operand:QIHISI 0 "register_operand" "=z")
+	(mem:QIHISI (reg:SI GBR_REG)))]
+  "TARGET_SH1"
+  "mov.<bwl>	@(0,gbr),%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*mov<mode>_gbr_load"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(sign_extend:SI
+	  (mem:QIHI (plus:SI (reg:SI GBR_REG)
+			     (match_operand:QIHI 1 "gbr_displacement")))))]
+  "TARGET_SH1"
+  "mov.<bw>	@(%O1,gbr),%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*mov<mode>_gbr_load"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(sign_extend:SI (mem:QIHI (reg:SI GBR_REG))))]
+  "TARGET_SH1"
+  "mov.<bw>	@(0,gbr),%0"
+  [(set_attr "type" "load")])
+
+(define_insn "*mov<mode>_gbr_store"
+  [(set (mem:QIHISI (plus:SI (reg:SI GBR_REG)
+			     (match_operand:QIHISI 0 "gbr_displacement")))
+	(match_operand:QIHISI 1 "register_operand" "z"))]
+  "TARGET_SH1"
+  "mov.<bwl>	%1,@(%O0,gbr)"
+  [(set_attr "type" "store")])
+
+(define_insn "*mov<mode>_gbr_store"
+  [(set (mem:QIHISI (reg:SI GBR_REG))
+	(match_operand:QIHISI 0 "register_operand" "z"))]
+  "TARGET_SH1"
+  "mov.<bwl>	%0,@(0,gbr)"
+  [(set_attr "type" "store")])
+
+;; DImode memory accesses have to be split in two SImode accesses.
+;; Split them before reload, so that it gets a better chance to figure out
+;; how to deal with the R0 restriction for the individual SImode accesses.
+;; Do not match this insn during or after reload because it can't be split
+;; afterwards.
+(define_insn_and_split "*movdi_gbr_load"
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "gbr_address_mem"))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 3) (match_dup 5))
+   (set (match_dup 4) (match_dup 6))]
+{
+  /* Swap low/high part load order on little endian, so that the result reg
+     of the second load can be used better.  */
+  int off = TARGET_LITTLE_ENDIAN ? 1 : 0;
+  operands[3 + off] = gen_lowpart (SImode, operands[0]);
+  operands[5 + off] = gen_lowpart (SImode, operands[1]);
+  operands[4 - off] = gen_highpart (SImode, operands[0]);
+  operands[6 - off] = gen_highpart (SImode, operands[1]);
+})
+
+(define_insn_and_split "*movdi_gbr_store"
+  [(set (match_operand:DI 0 "gbr_address_mem")
+	(match_operand:DI 1 "register_operand"))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 3) (match_dup 5))
+   (set (match_dup 4) (match_dup 6))]
+{
+  /* Swap low/high part store order on big endian, so that stores of function
+     call results can save a reg copy.  */
+  int off = TARGET_LITTLE_ENDIAN ? 0 : 1;
+  operands[3 + off] = gen_lowpart (SImode, operands[0]);
+  operands[5 + off] = gen_lowpart (SImode, operands[1]);
+  operands[4 - off] = gen_highpart (SImode, operands[0]);
+  operands[6 - off] = gen_highpart (SImode, operands[1]);
+})
+
+;; Sometimes memory accesses do not get combined with the store_gbr insn,
+;; in particular when the displacements are in the range of the regular move
+;; insns.  Thus, in the first split pass after the combine pass we search
+;; for missed opportunities and try to fix them up ourselves.
+;; If an equivalent GBR address can be determined the load / store is split
+;; into one of the GBR load / store patterns.
+;; All of that must happen before reload (GBR address modes use R0 as the
+;; other operand) and there's no point of doing it if the GBR is not
+;; referenced in a function at all.
+(define_split
+  [(set (match_operand:QIHISIDI 0 "register_operand")
+	(match_operand:QIHISIDI 1 "memory_operand"))]
+  "TARGET_SH1 && !reload_in_progress && !reload_completed
+   && df_regs_ever_live_p (GBR_REG)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[1]);
+  if (gbr_mem != NULL_RTX)
+    operands[1] = replace_equiv_address (operands[1], gbr_mem);
+  else
+    FAIL;
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(sign_extend:SI (match_operand:QIHI 1 "memory_operand")))]
+  "TARGET_SH1 && !reload_in_progress && !reload_completed
+   && df_regs_ever_live_p (GBR_REG)"
+  [(set (match_dup 0) (sign_extend:SI (match_dup 1)))]
+{
+  rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[1]);
+  if (gbr_mem != NULL_RTX)
+    operands[1] = replace_equiv_address (operands[1], gbr_mem);
+  else
+    FAIL;
+})
+
+;; On SH2A we've got movu.b and movu.w for doing zero-extending mem loads.
+;; Split those so that a GBR load can be used.
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(zero_extend:SI (match_operand:QIHI 1 "memory_operand")))]
+  "TARGET_SH2A && !reload_in_progress && !reload_completed
+   && df_regs_ever_live_p (GBR_REG)"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (zero_extend:SI (match_dup 2)))]
+{
+  rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[1]);
+  if (gbr_mem != NULL_RTX)
+    {
+      operands[2] = gen_reg_rtx (GET_MODE (operands[1]));
+      operands[1] = replace_equiv_address (operands[1], gbr_mem);
+    }
+  else
+    FAIL;
+})
+
+(define_split
+  [(set (match_operand:QIHISIDI 0 "memory_operand")
+	(match_operand:QIHISIDI 1 "register_operand"))]
+  "TARGET_SH1 && !reload_in_progress && !reload_completed
+   && df_regs_ever_live_p (GBR_REG)"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[0]);
+  if (gbr_mem != NULL_RTX)
+    operands[0] = replace_equiv_address (operands[0], gbr_mem);
+  else
+    FAIL;
+})
+
+;;------------------------------------------------------------------------------
+;; case instruction for switch statements.
+
+;; operand 0 is index
+;; operand 1 is the minimum bound
+;; operand 2 is the maximum bound - minimum bound + 1
+;; operand 3 is CODE_LABEL for the table;
+;; operand 4 is the CODE_LABEL to go to if index out of range.
+(define_expand "casesi"
+  [(match_operand:SI 0 "arith_reg_operand" "")
+   (match_operand:SI 1 "arith_reg_operand" "")
+   (match_operand:SI 2 "arith_reg_operand" "")
+   (match_operand 3 "" "") (match_operand 4 "" "")]
+  ""
+{
+  rtx reg = gen_reg_rtx (SImode);
+  rtx reg2 = gen_reg_rtx (SImode);
+  if (TARGET_SHMEDIA)
+    {
+      rtx reg = gen_reg_rtx (DImode);
+      rtx reg2 = gen_reg_rtx (DImode);
+      rtx reg3 = gen_reg_rtx (Pmode);
+      rtx reg4 = gen_reg_rtx (Pmode);
+      rtx reg5 = gen_reg_rtx (Pmode);
+      rtx load, test;
+
+      operands[0] = convert_modes (DImode, SImode, operands[0], 0);
+      operands[1] = convert_modes (DImode, SImode, operands[1], 0);
+      operands[2] = convert_modes (DImode, SImode, operands[2], 1);
+
+      test = gen_rtx_GT (VOIDmode, operands[1], operands[0]);
+      emit_jump_insn (gen_cbranchdi4 (test, operands[1], operands[0],
+				      operands[4]));
+      emit_move_insn (reg, gen_rtx_MINUS (DImode, operands[0], operands[1]));
+      test = gen_rtx_GTU (VOIDmode, reg, operands[2]);
+      emit_jump_insn (gen_cbranchdi4 (test, reg, operands[2], operands[4]));
+      emit_insn (gen_casesi_shift_media (reg2, reg, operands[3]));
+      emit_move_insn (reg3, gen_datalabel_ref (gen_rtx_LABEL_REF
+					       (Pmode, operands[3])));
+      /* Messy: can we subreg to clean this up? */
+      if (Pmode == DImode)
+	load = gen_casesi_load_media (reg4, reg3, reg2, operands[3]);
+      else
+	load = gen_casesi_load_media (reg4,
+				      gen_rtx_SUBREG (DImode, reg3, 0),
+				      reg2, operands[3]);
+      PUT_MODE (SET_SRC (load), Pmode);
+      emit_insn (load);
+      /* ??? The following add could be eliminated if we used ptrel.  */
+      emit_move_insn (reg5, gen_rtx_PLUS (Pmode, reg3, reg4));
+      emit_jump_insn (gen_casesi_jump_media (reg5, operands[3]));
+      emit_barrier ();
+      DONE;
+    }
+  operands[1] = copy_to_mode_reg (SImode, operands[1]);
+  operands[2] = copy_to_mode_reg (SImode, operands[2]);
+  /* If optimizing, casesi_worker depends on the mode of the instruction
+     before label it 'uses' - operands[3].  */
+  emit_insn (gen_casesi_0 (operands[0], operands[1], operands[2], operands[4],
+			   reg));
+  emit_insn (gen_casesi_worker_0 (reg2, reg, operands[3]));
+  if (TARGET_SH2)
+    emit_jump_insn (gen_casesi_jump_2 (reg2, gen_label_rtx (), operands[3]));
+  else
+    emit_jump_insn (gen_casesi_jump_1 (reg2, operands[3]));
+  /* For SH2 and newer, the ADDR_DIFF_VEC is not actually relative to
+     operands[3], but to lab.  We will fix this up in
+     machine_dependent_reorg.  */
+  emit_barrier ();
+  DONE;
+})
+
+(define_expand "casesi_0"
+  [(set (match_operand:SI 4 "" "") (match_operand:SI 0 "arith_reg_operand" ""))
+   (set (match_dup 4) (minus:SI (match_dup 4)
+				(match_operand:SI 1 "arith_operand" "")))
+   (set (reg:SI T_REG)
+	(gtu:SI (match_dup 4)
+		(match_operand:SI 2 "arith_reg_operand" "")))
+   (set (pc)
+	(if_then_else (ne (reg:SI T_REG)
+			  (const_int 0))
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))]
+  "TARGET_SH1"
+  "")
+
+;; ??? reload might clobber r0 if we use it explicitly in the RTL before
+;; reload; using a R0_REGS pseudo reg is likely to give poor code.
+;; So we keep the use of r0 hidden in a R0_REGS clobber until after reload.
+(define_insn "casesi_worker_0"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "0,r")
+		 (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI))
+   (clobber (match_scratch:SI 3 "=X,1"))
+   (clobber (match_scratch:SI 4 "=&z,z"))]
+  "TARGET_SH1"
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "")
+		    (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_SH1 && ! TARGET_SH2 && reload_completed"
+  [(set (reg:SI R0_REG) (unspec:SI [(label_ref (match_dup 2))] UNSPEC_MOVA))
+   (parallel [(set (match_dup 0)
+	      (unspec:SI [(reg:SI R0_REG) (match_dup 1)
+			  (label_ref (match_dup 2))] UNSPEC_CASESI))
+	      (clobber (match_dup 3))])
+   (set (match_dup 0) (plus:SI (match_dup 0) (reg:SI R0_REG)))]
+{
+  if (GET_CODE (operands[2]) == CODE_LABEL)
+    LABEL_NUSES (operands[2])++;
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "")
+		    (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI))
+   (clobber (match_scratch:SI 3 ""))
+   (clobber (match_scratch:SI 4 ""))]
+  "TARGET_SH2 && reload_completed"
+  [(set (reg:SI R0_REG) (unspec:SI [(label_ref (match_dup 2))] UNSPEC_MOVA))
+   (parallel [(set (match_dup 0)
+	      (unspec:SI [(reg:SI R0_REG) (match_dup 1)
+			  (label_ref (match_dup 2))] UNSPEC_CASESI))
+	      (clobber (match_dup 3))])]
+{
+  if (GET_CODE (operands[2]) == CODE_LABEL)
+    LABEL_NUSES (operands[2])++;
+})
+
+(define_insn "casesi_worker_1"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(reg:SI R0_REG)
+		    (match_operand:SI 1 "register_operand" "0,r")
+		    (label_ref (match_operand 2 "" ""))] UNSPEC_CASESI))
+   (clobber (match_scratch:SI 3 "=X,1"))]
+  "TARGET_SH1"
+{
+  rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return   "shll2	%1"	"\n"
+	     "	mov.l	@(r0,%1),%0";
+    case HImode:
+      return   "add	%1,%1"	"\n"
+	     "	mov.w	@(r0,%1),%0";
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return         "mov.b	@(r0,%1),%0"	"\n"
+	       "	extu.b	%0,%0";
+      else
+	return "mov.b	@(r0,%1),%0";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "length" "4")])
+
+(define_insn "casesi_worker_2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(reg:SI R0_REG)
+		    (match_operand:SI 1 "register_operand" "0,r")
+		    (label_ref (match_operand 2 "" ""))
+		    (label_ref (match_operand 3 "" ""))] UNSPEC_CASESI))
+   (clobber (match_operand:SI 4 "" "=X,1"))]
+  "TARGET_SH2 && reload_completed && flag_pic"
+{
+  rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return   "shll2	%1"		"\n"
+	     "	add	r0,%1"		"\n"
+	     "	mova	%O3,r0"		"\n"
+	     "  mov.l	@(r0,%1),%0";
+    case HImode:
+      return   "add	%1,%1"		"\n"
+	     "	add	r0,%1"		"\n"
+	     "	mova	%O3,r0"		"\n"
+	     "	mov.w	@(r0,%1),%0";
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return	       "add	r0,%1"		"\n"
+		"	mova	%O3,r0"		"\n"
+		"	mov.b	@(r0,%1),%0"	"\n"
+		"	extu.b	%0,%0";
+      else
+	return	       "add	r0,%1"		"\n"
+		"	mova	%O3,r0"		"\n"
+		"	mov.b	@(r0,%1),%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "length" "8")])
+
+(define_insn "casesi_shift_media"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ashift:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		   (unspec:DI [(label_ref:DI (match_operand 2 "" ""))]
+		    UNSPEC_CASESI)))]
+  "TARGET_SHMEDIA"
+{
+  rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return "shlli	%1, 2, %0";
+    case HImode:
+      return "shlli	%1, 1, %0";
+    case QImode:
+      if (rtx_equal_p (operands[0], operands[1]))
+	return "";
+      return "add	%1, r63, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "arith_media")])
+
+(define_insn "casesi_load_media"
+  [(set (match_operand 0 "any_arith_reg_dest" "=r")
+	(mem (unspec [(match_operand:DI 1 "arith_reg_operand" "r")
+		      (match_operand:DI 2 "arith_reg_operand" "r")
+		      (label_ref:DI (match_operand 3 "" ""))] UNSPEC_CASESI)))]
+  "TARGET_SHMEDIA"
+{
+  rtx diff_vec = PATTERN (NEXT_INSN (operands[3]));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return "ldx.l	%1, %2, %0";
+    case HImode:
+#if 0
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return "ldx.uw	%1, %2, %0";
+#endif
+      return "ldx.w	%1, %2, %0";
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return "ldx.ub	%1, %2, %0";
+      return "ldx.b	%1, %2, %0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "load_media")])
+
+(define_expand "simple_return"
+  [(simple_return)]
+ "sh_can_use_simple_return_p ()")
+
+(define_expand "return"
+  [(return)]
+ "reload_completed && epilogue_completed"
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_jump_insn (gen_return_media ());
+      DONE;
+    }
+
+  if (TARGET_SHCOMPACT
+      && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1)))
+    {
+      emit_jump_insn (gen_shcompact_return_tramp ());
+      DONE;
+    }
+})
+
+(define_insn "*<code>_i"
+  [(any_return)]
+  "TARGET_SH1 && ! (TARGET_SHCOMPACT
+		    && (crtl->args.info.call_cookie
+			& CALL_COOKIE_RET_TRAMP (1)))
+   && reload_completed
+   && ! sh_cfun_trap_exit_p ()"
+{
+  if (TARGET_SH2A && (dbr_sequence_length () == 0)
+      && !current_function_interrupt)
+    return "rts/n";
+  else
+    return "%@	%#";
+}
+  [(set_attr "type" "return")
+   (set_attr "needs_delay_slot" "yes")])
+
+;; trapa has no delay slot.
+(define_insn "*return_trapa"
+  [(return)]
+  "TARGET_SH1 && !TARGET_SHCOMPACT
+   && reload_completed"
+  "%@"
+  [(set_attr "type" "return")])
+
+(define_expand "shcompact_return_tramp"
+  [(return)]
+  "TARGET_SHCOMPACT
+   && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))"
+{
+  rtx reg = gen_rtx_REG (Pmode, R0_REG);
+
+  function_symbol (reg, "__GCC_shcompact_return_trampoline", SFUNC_STATIC);
+  emit_jump_insn (gen_shcompact_return_tramp_i ());
+  DONE;
+})
+
+(define_insn "shcompact_return_tramp_i"
+  [(parallel [(return) (use (reg:SI R0_REG))])]
+  "TARGET_SHCOMPACT
+   && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))"
+  "jmp	@r0%#"
+  [(set_attr "type" "jump_ind")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "return_media_i"
+  [(parallel [(return) (use (match_operand 0 "target_reg_operand" "k"))])]
+  "TARGET_SHMEDIA && reload_completed"
+  "blink	%0, r63"
+  [(set_attr "type" "jump_media")])
+
+(define_insn "return_media_rte"
+  [(return)]
+  "TARGET_SHMEDIA && reload_completed && current_function_interrupt"
+  "rte"
+  [(set_attr "type" "jump_media")])
+
+(define_expand "return_media"
+  [(return)]
+  "TARGET_SHMEDIA && reload_completed"
+{
+  int tr_regno = sh_media_register_for_return ();
+  rtx tr;
+
+  if (current_function_interrupt)
+    {
+      emit_jump_insn (gen_return_media_rte ());
+      DONE;
+    }
+  if (tr_regno < 0)
+    {
+      rtx r18 = gen_rtx_REG (Pmode, PR_MEDIA_REG);
+
+      gcc_assert (call_really_used_regs[TR0_REG] && !fixed_regs[TR0_REG]);
+      tr_regno = TR0_REG;
+      tr = gen_rtx_REG (Pmode, tr_regno);
+      emit_move_insn (tr, r18);
+    }
+  else
+    tr = gen_rtx_REG (Pmode, tr_regno);
+
+  emit_jump_insn (gen_return_media_i (tr));
+  DONE;
+})
+
+(define_insn "shcompact_preserve_incoming_args"
+  [(set (match_operand:SI 0 "register_operand" "+r")
+	(unspec:SI [(match_dup 0)] UNSPEC_COMPACT_ARGS))]
+  "TARGET_SHCOMPACT"
+  ""
+  [(set_attr "length" "0")])
+
+(define_insn "shcompact_incoming_args"
+  [(set (reg:SI R2_REG) (unspec:SI [(reg:SI R2_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R3_REG) (unspec:SI [(reg:SI R3_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R4_REG) (unspec:SI [(reg:SI R4_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R5_REG) (unspec:SI [(reg:SI R5_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R6_REG) (unspec:SI [(reg:SI R6_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R7_REG) (unspec:SI [(reg:SI R7_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R8_REG) (unspec:SI [(reg:SI R8_REG)] UNSPEC_COMPACT_ARGS))
+   (set (reg:SI R9_REG) (unspec:SI [(reg:SI R9_REG)] UNSPEC_COMPACT_ARGS))
+   (set (mem:BLK (reg:SI MACL_REG))
+	(unspec:BLK [(reg:SI MACH_REG)] UNSPEC_COMPACT_ARGS))
+   (use (reg:SI R0_REG))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI MACL_REG))
+   (clobber (reg:SI MACH_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT"
+  "jsr	@r0%#"
+  [(set_attr "needs_delay_slot" "yes")])
+
+(define_insn "shmedia_save_restore_regs_compact"
+  [(set (reg:SI SP_REG)
+	(plus:SI (reg:SI SP_REG)
+		 (match_operand:SI 0 "immediate_operand" "i")))
+   (use (reg:SI R0_REG))
+   (clobber (reg:SI PR_REG))]
+  "TARGET_SHCOMPACT
+   && (INTVAL (operands[0]) == SHMEDIA_REGS_STACK_ADJUST ()
+       || INTVAL (operands[0]) == - SHMEDIA_REGS_STACK_ADJUST ())"
+  "jsr @r0%#"
+  [(set_attr "needs_delay_slot" "yes")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  sh_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+{
+  sh_expand_epilogue (false);
+  if (TARGET_SHMEDIA
+      || (TARGET_SHCOMPACT
+	  && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
+    {
+      emit_jump_insn (gen_return ());
+      DONE;
+    }
+})
+
+(define_expand "eh_return"
+  [(use (match_operand 0 "register_operand" ""))]
+  ""
+{
+  rtx ra = operands[0];
+
+  if (TARGET_SHMEDIA64)
+    emit_insn (gen_eh_set_ra_di (ra));
+  else
+    emit_insn (gen_eh_set_ra_si (ra));
+
+  DONE;
+})
+
+;; Clobber the return address on the stack.  We can't expand this
+;; until we know where it will be put in the stack frame.
+
+(define_insn "eh_set_ra_si"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")]
+      UNSPECV_EH_RETURN)
+   (clobber (match_scratch:SI 1 "=&r"))]
+  "! TARGET_SHMEDIA64"
+  "#")
+
+(define_insn "eh_set_ra_di"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")]
+      UNSPECV_EH_RETURN)
+   (clobber (match_scratch:DI 1 "=&r"))]
+  "TARGET_SHMEDIA64"
+  "#")
+
+(define_split
+  [(unspec_volatile [(match_operand 0 "register_operand" "")]
+      UNSPECV_EH_RETURN)
+   (clobber (match_scratch 1 ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  sh_set_return_address (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; Define movml instructions for SH2A target.  Currently they are
+;; used to push and pop all banked registers only.
+
+(define_insn "movml_push_banked"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (plus (match_dup 0) (const_int -32)))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 28))) (reg:SI R7_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 24))) (reg:SI R6_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 20))) (reg:SI R5_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 16))) (reg:SI R4_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 12))) (reg:SI R3_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 8))) (reg:SI R2_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 4))) (reg:SI R1_REG))
+   (set (mem:SI (plus:SI (match_dup 0) (const_int 0))) (reg:SI R0_REG))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l	r7,@-r15"
+  [(set_attr "in_delay_slot" "no")])
+
+(define_insn "movml_pop_banked"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	  (plus (match_dup 0) (const_int 32)))
+   (set (reg:SI R0_REG) (mem:SI (plus:SI (match_dup 0) (const_int -32))))
+   (set (reg:SI R1_REG) (mem:SI (plus:SI (match_dup 0) (const_int -28))))
+   (set (reg:SI R2_REG) (mem:SI (plus:SI (match_dup 0) (const_int -24))))
+   (set (reg:SI R3_REG) (mem:SI (plus:SI (match_dup 0) (const_int -20))))
+   (set (reg:SI R4_REG) (mem:SI (plus:SI (match_dup 0) (const_int -16))))
+   (set (reg:SI R5_REG) (mem:SI (plus:SI (match_dup 0) (const_int -12))))
+   (set (reg:SI R6_REG) (mem:SI (plus:SI (match_dup 0) (const_int -8))))
+   (set (reg:SI R7_REG) (mem:SI (plus:SI (match_dup 0) (const_int -4))))]
+  "TARGET_SH2A && REGNO (operands[0]) == 15"
+  "movml.l	@r15+,r7"
+  [(set_attr "in_delay_slot" "no")])
+
+;; ------------------------------------------------------------------------
+;; Scc instructions
+;; ------------------------------------------------------------------------
+
+(define_insn "movt"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(match_operand:SI 1 "t_reg_operand"))]
+  "TARGET_SH1"
+  "movt	%0"
+  [(set_attr "type" "arith")])
+
+(define_insn "movrt"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))]
+  "TARGET_SH2A"
+  "movrt	%0"
+  [(set_attr "type" "arith")])
+
+(define_expand "cstore4_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "sh_float_comparison_operator"
+	 [(match_operand 2 "logical_operand" "")
+	  (match_operand 3 "cmp_operand" "")]))]
+  "TARGET_SHMEDIA"
+{
+  enum machine_mode mode = GET_MODE (operands[2]);
+  enum rtx_code code = GET_CODE (operands[1]);
+  bool invert, swap;
+  if (mode == VOIDmode)
+    mode = GET_MODE (operands[3]);
+  if (operands[2] == const0_rtx)
+    {
+      if (code == EQ || code == NE)
+	operands[2] = operands[3], operands[3] = const0_rtx;
+    }
+  else
+    operands[2] = force_reg (mode, operands[2]);
+  if (operands[3] != const0_rtx)
+    operands[3] = force_reg (mode, operands[3]);
+
+  switch (code)
+    {
+    case GEU:
+    case GE:
+      swap = invert = !FLOAT_MODE_P (mode);
+      break;
+
+    case LEU:
+    case LE:
+      swap = FLOAT_MODE_P (mode), invert = !swap;
+      break;
+
+    case LTU:
+    case LT:
+      swap = true, invert = false;
+      break;
+
+    case GTU:
+    case GT:
+    case EQ:
+    case UNORDERED:
+      swap = invert = false;
+      break;
+
+    case NE:
+      swap = invert = true;
+      break;
+
+    default:
+      gcc_unreachable ();
+  }
+
+  if (swap)
+    {
+      rtx tem = operands[2];
+      operands[2] = operands[3];
+      operands[3] = tem;
+      code = swap_condition (code);
+    }
+
+  if (invert)
+    {
+      rtx tem = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+      code = reverse_condition (code);
+      operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[2], operands[3]);
+      emit_insn (gen_cstore4_media (tem, operands[1],
+				    operands[2], operands[3]));
+      code = EQ;
+      operands[2] = tem;
+      operands[3] = const0_rtx;
+    }
+
+  operands[1] = gen_rtx_fmt_ee (code, VOIDmode, operands[2], operands[3]);
+})
+
+(define_expand "cstoresi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "comparison_operator"
+	 [(match_operand:SI 2 "cmpsi_operand" "")
+	  (match_operand:SI 3 "arith_operand" "")]))]
+  "TARGET_SH1 || TARGET_SHMEDIA"
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_cstore4_media (operands[0], operands[1],
+				    operands[2], operands[3]));
+      DONE;
+    }
+
+   if (sh_expand_t_scc (operands))
+     DONE;
+
+   if (! currently_expanding_to_rtl)
+     FAIL;
+   
+   sh_emit_compare_and_set (operands, SImode);
+   DONE;
+})
+
+(define_expand "cstoredi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "comparison_operator"
+	 [(match_operand:DI 2 "arith_operand" "")
+	  (match_operand:DI 3 "arith_operand" "")]))]
+  "TARGET_SH2 || TARGET_SHMEDIA"
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_cstore4_media (operands[0], operands[1],
+				    operands[2], operands[3]));
+      DONE;
+    }
+
+   if (sh_expand_t_scc (operands))
+     DONE;
+
+   if (! currently_expanding_to_rtl)
+     FAIL;
+   
+   sh_emit_compare_and_set (operands, DImode);
+   DONE;
+})
+
+;; Move the complement of the T reg to a reg.
+;; On SH2A the movrt insn can be used.
+;; On anything else than SH2A this has to be done with multiple instructions.
+;; One obvious way would be:
+;;	cmp/eq	...
+;;	movt	r0
+;;	xor	#1,r0
+;;
+;; However, this puts pressure on r0 in most cases and thus the following is
+;; more appealing:
+;;	cmp/eq	...
+;;	mov	#-1,temp
+;;	negc	temp,dest
+;;
+;; If the constant -1 can be CSE-ed or lifted out of a loop it effectively
+;; becomes a one instruction operation.  Moreover, care must be taken that
+;; the insn can still be combined with inverted compare and branch code
+;; around it.  On the other hand, if a function returns the complement of
+;; a previous comparison result in the T bit, the xor #1,r0 approach might
+;; lead to better code.
+(define_expand "movnegt"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))]
+  "TARGET_SH1"
+{
+  if (TARGET_SH2A)
+    emit_insn (gen_movrt (operands[0], operands[1]));
+  else
+    {
+      rtx val = force_reg (SImode, gen_int_mode (-1, SImode));
+      emit_insn (gen_movrt_negc (operands[0], operands[1], val));
+    }
+  DONE;
+})
+
+(define_insn "movrt_negc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))
+   (set (reg:SI T_REG) (const_int 1))
+   (use (match_operand:SI 2 "arith_reg_operand" "r"))]
+  "TARGET_SH1"
+  "negc	%2,%0"
+  [(set_attr "type" "arith")])
+
+;; The -1 constant will not be CSE-ed for the *movrt_negc pattern, but the
+;; pattern can be used by the combine pass.  Using a scratch reg for the
+;; -1 constant results in slightly better register allocations compared to
+;; generating a pseudo reg before reload.
+(define_insn_and_split "*movrt_negc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(xor:SI (match_operand:SI 1 "t_reg_operand" "") (const_int 1)))
+   (clobber (match_scratch:SI 2 "=r"))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && ! TARGET_SH2A"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (const_int -1))
+   (parallel
+       [(set (match_dup 0) (xor:SI (match_dup 1) (const_int 1)))
+	(set (reg:SI T_REG) (const_int 1))
+	(use (match_dup 2))])])
+
+;; Store the negated T bit in a reg using r0 and xor.  This one doesn't
+;; clobber the T bit, which is useful when storing the T bit and the
+;; negated T bit in parallel.  On SH2A the movrt insn can be used for that.
+;; Usually we don't want this insn to be matched, except for cases where the
+;; T bit clobber is really not appreciated.  Hence the extra use on T_REG.
+(define_insn_and_split "movrt_xor"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=z")
+	(xor:SI (match_operand:SI 1 "t_reg_operand") (const_int 1)))
+   (use (reg:SI T_REG))]
+  "TARGET_SH1 && !TARGET_SH2A"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (reg:SI T_REG))
+   (set (match_dup 0) (xor:SI (match_dup 0) (const_int 1)))])
+
+;; Store the T bit and the negated T bit in two regs in parallel.  There is
+;; no real insn to do that, but specifying this pattern will give combine
+;; some opportunities.
+(define_insn_and_split "*movt_movrt"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_dest")
+		   (match_operand:SI 1 "negt_reg_operand"))
+	      (set (match_operand:SI 2 "arith_reg_dest")
+		   (match_operand:SI 3 "t_reg_operand"))])]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx i = TARGET_SH2A
+	  ? gen_movrt (operands[0], get_t_reg_rtx ())
+	  : gen_movrt_xor (operands[0], get_t_reg_rtx ());
+  
+  emit_insn (i);
+  emit_insn (gen_movt (operands[2], get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn_and_split "*movt_movrt"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_dest")
+		   (match_operand:SI 1 "t_reg_operand"))
+	      (set (match_operand:SI 2 "arith_reg_dest")
+		   (match_operand:SI 3 "negt_reg_operand"))])]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 2) (match_dup 3))
+	      (set (match_dup 0) (match_dup 1))])])
+
+;; Use negc to store the T bit in a MSB of a reg in the following way:
+;;	T = 1: 0x80000000 -> reg
+;;	T = 0: 0x7FFFFFFF -> reg
+;; This works because 0 - 0x80000000 = 0x80000000.
+;;
+;; This insn must not match again after it has been split into the constant
+;; load and negc.  This is accomplished by the special negc insn that
+;; has a use on the operand.
+(define_insn_and_split "*mov_t_msb_neg"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(minus:SI (const_int -2147483648)  ;; 0x80000000
+		  (match_operand 1 "t_reg_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(set (match_dup 2) (const_int -2147483648))
+   (parallel [(set (match_dup 0) (minus:SI (neg:SI (match_dup 2))
+				 	   (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))
+	      (use (match_dup 2))])]
+{
+  operands[2] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*mov_t_msb_neg_negc"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(minus:SI (neg:SI (match_operand:SI 1 "arith_reg_operand" "r"))
+		  (match_operand:SI 2 "t_reg_operand")))
+   (clobber (reg:SI T_REG))
+   (use (match_dup 1))]
+  "TARGET_SH1"
+  "negc	%1,%0"
+  [(set_attr "type" "arith")])
+
+;; These are essentially the same as above, but with the inverted T bit.
+;; Combine recognizes the split patterns, but does not take them sometimes
+;; if the T_REG clobber is specified.  Instead it tries to split out the
+;; T bit negation.  Since these splits are supposed to be taken only by
+;; combine, it will see the T_REG clobber of the *mov_t_msb_neg insn, so this
+;; should be fine.
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (match_operand 1 "negt_reg_operand")
+		 (const_int 2147483647)))]  ;; 0x7fffffff
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (minus:SI (const_int -2147483648) (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])])
+
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(if_then_else:SI (match_operand 1 "t_reg_operand")
+			 (const_int 2147483647)  ;; 0x7fffffff
+			 (const_int -2147483648)))]  ;; 0x80000000
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (minus:SI (const_int -2147483648) (reg:SI T_REG)))
+	      (clobber (reg:SI T_REG))])])
+
+;; The *negnegt pattern helps the combine pass to figure out how to fold 
+;; an explicit double T bit negation.
+(define_insn_and_split "*negnegt"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand 0 "negt_reg_operand" "") (const_int 0)))]
+  "TARGET_SH1"
+  "#"
+  ""
+  [(const_int 0)])
+
+;; Store T bit as all zeros or ones in a reg.
+(define_insn "mov_neg_si_t"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(neg:SI (match_operand 1 "t_reg_operand" "")))]
+  "TARGET_SH1"
+  "subc	%0,%0"
+  [(set_attr "type" "arith")])
+
+;; Store negated T bit as all zeros or ones in a reg.
+;; Use the following sequence:
+;;	subc	Rn,Rn	! Rn = Rn - Rn - T; T = T
+;;	not	Rn,Rn	! Rn = 0 - Rn
+(define_split
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(neg:SI (match_operand 1 "negt_reg_operand" "")))]
+  "TARGET_SH1"
+  [(set (match_dup 0) (neg:SI (reg:SI T_REG)))
+   (set (match_dup 0) (not:SI (match_dup 0)))])
+
+;; The *movtt pattern eliminates redundant T bit to T bit moves / tests.
+(define_insn_and_split "*movtt"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand 0 "t_reg_operand" "") (const_int 1)))]
+  "TARGET_SH1"
+  "#"
+  ""
+  [(const_int 0)])
+
+;; Invert the T bit.
+;; On SH2A we can use the nott insn.  On anything else this must be done with
+;; multiple insns like:
+;;	movt	Rn
+;;	tst	Rn,Rn
+;; This requires an additional pseudo.  The SH specific sh_treg_combine RTL
+;; pass will look for this insn.  Disallow using it if pseudos can't be
+;; created.
+(define_insn_and_split "nott"
+  [(set (reg:SI T_REG)
+	(xor:SI (match_operand:SI 0 "t_reg_operand") (const_int 1)))]
+  "TARGET_SH2A || (TARGET_SH1 && can_create_pseudo_p ())"
+{
+  gcc_assert (TARGET_SH2A);
+  return "nott";
+}
+  "! TARGET_SH2A && can_create_pseudo_p ()"
+  [(set (match_dup 0) (reg:SI T_REG))
+   (set (reg:SI T_REG) (eq:SI (match_dup 0) (const_int 0)))]
+{
+  operands[0] = gen_reg_rtx (SImode);
+})
+
+;; Store T bit as MSB in a reg.
+;; T = 0: 0x00000000 -> reg
+;; T = 1: 0x80000000 -> reg
+(define_insn_and_split "*movt_msb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(mult:SI (match_operand:SI 1 "t_reg_operand")
+		 (const_int -2147483648)))  ;; 0xffffffff80000000
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (ashift:SI (reg:SI T_REG) (const_int 31)))])
+
+;; Store inverted T bit as MSB in a reg.
+;; T = 0: 0x80000000 -> reg
+;; T = 1: 0x00000000 -> reg
+;; On SH2A we can get away without clobbering the T_REG.
+(define_insn_and_split "*negt_msb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(match_operand:SI 1 "negt_reg_shl31_operand"))]
+  "TARGET_SH2A"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_insn (gen_movrt (tmp, get_t_reg_rtx ()));
+  emit_insn (gen_rotrsi3 (operands[0], tmp, const1_rtx));
+  DONE;
+})
+
+(define_insn_and_split "*negt_msb"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(match_operand:SI 1 "negt_reg_shl31_operand"))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH1 && !TARGET_SH2A"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  emit_move_insn (tmp, get_t_reg_rtx ());
+  emit_insn (gen_cmpeqsi_t (tmp, const0_rtx));
+  emit_insn (gen_rotcr (operands[0], tmp, get_t_reg_rtx ()));
+  DONE;
+})
+
+;; The *cset_zero patterns convert optimizations such as
+;;	"if (test) x = 0;"
+;; to
+;;	"x &= -(test == 0);"
+;; back to conditional branch sequences if zero-displacement branches
+;; are enabled.
+;; FIXME: These patterns can be removed when conditional execution patterns
+;; are implemented, since ifcvt will not perform these optimizations if
+;; conditional execution is supported.
+(define_insn "*cset_zero"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(and:SI (plus:SI (match_operand:SI 1 "t_reg_operand")
+			 (const_int -1))
+		(match_operand:SI 2 "arith_reg_operand" "0")))]
+  "TARGET_SH1 && TARGET_ZDCBRANCH"
+{
+  return       "bf	0f"	"\n"
+	 "	mov	#0,%0"	"\n"
+	 "0:";
+}
+  [(set_attr "type" "arith") ;; poor approximation
+   (set_attr "length" "4")])
+
+(define_insn "*cset_zero"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(if_then_else:SI (match_operand:SI 1 "t_reg_operand")
+			 (match_operand:SI 2 "arith_reg_operand" "0")
+			 (const_int 0)))]
+  "TARGET_SH1 && TARGET_ZDCBRANCH"
+{
+  return       "bt	0f"	"\n"
+	 "	mov	#0,%0"	"\n"
+	 "0:";
+}
+  [(set_attr "type" "arith") ;; poor approximation
+   (set_attr "length" "4")])
+
+(define_expand "cstoresf4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "sh_float_comparison_operator"
+	 [(match_operand:SF 2 "arith_operand" "")
+	  (match_operand:SF 3 "arith_operand" "")]))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_cstore4_media (operands[0], operands[1],
+				    operands[2], operands[3]));
+      DONE;
+    }
+
+  if (! currently_expanding_to_rtl)
+    FAIL;
+   
+  sh_emit_compare_and_set (operands, SFmode);
+  DONE;
+})
+
+(define_expand "cstoredf4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(match_operator:SI 1 "sh_float_comparison_operator"
+	 [(match_operand:DF 2 "arith_operand" "")
+	  (match_operand:DF 3 "arith_operand" "")]))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SHMEDIA)
+    {
+      emit_insn (gen_cstore4_media (operands[0], operands[1],
+				    operands[2], operands[3]));
+      DONE;
+    }
+
+  if (! currently_expanding_to_rtl)
+    FAIL;
+   
+  sh_emit_compare_and_set (operands, DFmode);
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Instructions to cope with inline literal tables
+;; -------------------------------------------------------------------------
+
+;; 2 byte integer in line
+(define_insn "consttable_2"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST2)]
+ ""
+{
+  if (operands[1] != const0_rtx)
+    assemble_integer (operands[0], 2, BITS_PER_UNIT * 2, 1);
+  return "";
+}
+ [(set_attr "length" "2")
+ (set_attr "in_delay_slot" "no")])
+
+;; 4 byte integer in line
+(define_insn "consttable_4"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST4)]
+ ""
+{
+  if (operands[1] != const0_rtx)
+    {
+      assemble_integer (operands[0], 4, BITS_PER_UNIT * 4, 1);
+      mark_symbol_refs_as_used (operands[0]);
+    }
+  return "";
+}
+ [(set_attr "length" "4")
+  (set_attr "in_delay_slot" "no")])
+
+;; 8 byte integer in line
+(define_insn "consttable_8"
+ [(unspec_volatile [(match_operand:SI 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST8)]
+ ""
+{
+  if (operands[1] != const0_rtx)
+    assemble_integer (operands[0], 8, BITS_PER_UNIT * 8, 1);
+  return "";
+}
+ [(set_attr "length" "8")
+  (set_attr "in_delay_slot" "no")])
+
+;; 4 byte floating point
+(define_insn "consttable_sf"
+ [(unspec_volatile [(match_operand:SF 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST4)]
+ ""
+{
+  if (operands[1] != const0_rtx)
+    {
+      REAL_VALUE_TYPE d;
+      REAL_VALUE_FROM_CONST_DOUBLE (d, operands[0]);
+      assemble_real (d, SFmode, GET_MODE_ALIGNMENT (SFmode));
+    }
+  return "";
+}
+ [(set_attr "length" "4")
+  (set_attr "in_delay_slot" "no")])
+
+;; 8 byte floating point
+(define_insn "consttable_df"
+ [(unspec_volatile [(match_operand:DF 0 "general_operand" "=g")
+		    (match_operand 1 "" "")]
+		   UNSPECV_CONST8)]
+ ""
+{
+  if (operands[1] != const0_rtx)
+    {
+      REAL_VALUE_TYPE d;
+      REAL_VALUE_FROM_CONST_DOUBLE (d, operands[0]);
+      assemble_real (d, DFmode, GET_MODE_ALIGNMENT (DFmode));
+    }
+  return "";
+}
+ [(set_attr "length" "8")
+  (set_attr "in_delay_slot" "no")])
+
+;; Alignment is needed for some constant tables; it may also be added for
+;; Instructions at the start of loops, or after unconditional branches.
+;; ??? We would get more accurate lengths if we did instruction
+;; alignment based on the value of INSN_CURRENT_ADDRESS; the approach used
+;; here is too conservative.
+
+;; align to a two byte boundary
+(define_expand "align_2"
+ [(unspec_volatile [(const_int 1)] UNSPECV_ALIGN)]
+ ""
+ "")
+
+;; Align to a four byte boundary.
+;; align_4 and align_log are instructions for the starts of loops, or
+;; after unconditional branches, which may take up extra room.
+(define_expand "align_4"
+ [(unspec_volatile [(const_int 2)] UNSPECV_ALIGN)]
+ ""
+ "")
+
+;; Align to a cache line boundary.
+(define_insn "align_log"
+ [(unspec_volatile [(match_operand 0 "const_int_operand" "")] UNSPECV_ALIGN)]
+ ""
+ ""
+ [(set_attr "length" "0")
+  (set_attr "in_delay_slot" "no")])
+
+;; Emitted at the end of the literal table, used to emit the
+;; 32bit branch labels if needed.
+(define_insn "consttable_end"
+  [(unspec_volatile [(const_int 0)] UNSPECV_CONST_END)]
+  ""
+{
+  return output_jump_label_table ();
+}
+  [(set_attr "in_delay_slot" "no")])
+
+;; Emitted at the end of the window in the literal table.
+(define_insn "consttable_window_end"
+  [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_WINDOW_END)]
+  ""
+  ""
+  [(set_attr "length" "0")
+   (set_attr "in_delay_slot" "no")])
+
+;; -------------------------------------------------------------------------
+;; Minimum / maximum operations.
+;; -------------------------------------------------------------------------
+
+;; The SH2A clips.b and clips.w insns do a signed min-max function.  If smin
+;; and smax standard name patterns are defined, they will be used during
+;; initial expansion and combine will then be able to form the actual min-max
+;; pattern.
+;; The clips.b and clips.w set the SR.CS bit if the value in the register is
+;; clipped, but there is currently no way of making use of this information.
+;; The only way to read or reset the SR.CS bit is by accessing the SR.
+(define_expand "<code>si3"
+  [(parallel [(set (match_operand:SI 0 "arith_reg_dest")
+		   (SMIN_SMAX:SI (match_operand:SI 1 "arith_reg_operand")
+				 (match_operand 2 "const_int_operand")))
+	      (clobber (reg:SI T_REG))])]
+  "TARGET_SH2A"
+{
+  /* Force the comparison value into a register, because greater-than
+     comparisons can work only on registers.  Combine will be able to pick up
+     the constant value from the REG_EQUAL note when trying to form a min-max
+     pattern.  */
+  operands[2] = force_reg (SImode, operands[2]);
+})
+
+;; Convert
+;;	smax (smin (...))
+;; to
+;;	smin (smax (...))
+(define_insn_and_split "*clips"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(smax:SI (smin:SI (match_operand:SI 1 "arith_reg_operand")
+			  (match_operand 2 "clips_max_const_int"))
+		 (match_operand 3 "clips_min_const_int")))]
+  "TARGET_SH2A"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(smin:SI (smax:SI (match_dup 1) (match_dup 3)) (match_dup 2)))])
+
+(define_insn "*clips"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(smin:SI (smax:SI (match_operand:SI 1 "arith_reg_operand" "0")
+			  (match_operand 2 "clips_min_const_int"))
+		 (match_operand 3 "clips_max_const_int")))]
+  "TARGET_SH2A"
+{
+  if (INTVAL (operands[3]) == 127)
+    return "clips.b	%0";
+  else if (INTVAL (operands[3]) == 32767)
+    return "clips.w	%0";
+  else
+    gcc_unreachable ();
+}
+  [(set_attr "type" "arith")])
+
+;; If the expanded smin or smax patterns were not combined, split them into
+;; a compare and branch sequence, because there are no real smin or smax
+;; insns.
+(define_insn_and_split "*<code>si3"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(SMIN_SMAX:SI (match_operand:SI 1 "arith_reg_operand")
+		      (match_operand:SI 2 "arith_reg_or_0_or_1_operand")))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH2A && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx skip_label = gen_label_rtx ();
+  emit_move_insn (operands[0], operands[1]);
+
+  rtx cmp_val = operands[2];
+  if (satisfies_constraint_M (cmp_val))
+    cmp_val = const0_rtx;
+
+  emit_insn (gen_cmpgtsi_t (operands[0], cmp_val));
+  emit_jump_insn (<CODE> == SMIN
+			    ? gen_branch_false (skip_label)
+			    : gen_branch_true (skip_label));
+
+  emit_label_after (skip_label, emit_move_insn (operands[0], operands[2]));
+  DONE;
+})
+
+;; The SH2A clipu.b and clipu.w insns can be used to implement a min function
+;; with a register and a constant.
+;; The clipu.b and clipu.w set the SR.CS bit if the value in the register is
+;; clipped, but there is currently no way of making use of this information.
+;; The only way to read or reset the SR.CS bit is by accessing the SR.
+(define_expand "uminsi3"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(umin:SI (match_operand:SI 1 "arith_reg_operand")
+		 (match_operand 2 "const_int_operand")))]
+  "TARGET_SH2A"
+{
+  if (INTVAL (operands[2]) == 1)
+    {
+      emit_insn (gen_clipu_one (operands[0], operands[1]));
+      DONE;
+    }
+  else if (! clipu_max_const_int (operands[2], VOIDmode))
+    FAIL;
+})
+
+(define_insn "*clipu"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(umin:SI (match_operand:SI 1 "arith_reg_operand" "0")
+		 (match_operand 2 "clipu_max_const_int")))]
+  "TARGET_SH2A"
+{
+  if (INTVAL (operands[2]) == 255)
+    return "clipu.b	%0";
+  else if (INTVAL (operands[2]) == 65535)
+    return "clipu.w	%0";
+  else
+    gcc_unreachable ();
+}
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split "clipu_one"
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(umin:SI (match_operand:SI 1 "arith_reg_operand") (const_int 1)))
+   (clobber (reg:SI T_REG))]
+  "TARGET_SH2A"
+  "#"
+  "&& can_create_pseudo_p ()"
+  [(const_int 0)]
+{
+  emit_insn (gen_cmpeqsi_t (operands[1], const0_rtx));
+  emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Misc
+;; -------------------------------------------------------------------------
+
+;; String/block move insn.
+
+(define_expand "movmemsi"
+  [(parallel [(set (mem:BLK (match_operand:BLK 0))
+		   (mem:BLK (match_operand:BLK 1)))
+	      (use (match_operand:SI 2 "nonmemory_operand"))
+	      (use (match_operand:SI 3 "immediate_operand"))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R4_REG))
+	      (clobber (reg:SI R5_REG))
+	      (clobber (reg:SI R0_REG))])]
+  "TARGET_SH1 && ! TARGET_SH5"
+{
+  if (expand_block_move (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn "block_move_real"
+  [(parallel [(set (mem:BLK (reg:SI R4_REG))
+		   (mem:BLK (reg:SI R5_REG)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R0_REG))])]
+  "TARGET_SH1 && ! TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_lump_real"
+  [(parallel [(set (mem:BLK (reg:SI R4_REG))
+		   (mem:BLK (reg:SI R5_REG)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (reg:SI R6_REG))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI T_REG))
+	      (clobber (reg:SI R4_REG))
+	      (clobber (reg:SI R5_REG))
+	      (clobber (reg:SI R6_REG))
+	      (clobber (reg:SI R0_REG))])]
+  "TARGET_SH1 && ! TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_move_real_i4"
+  [(parallel [(set (mem:BLK (reg:SI R4_REG))
+		   (mem:BLK (reg:SI R5_REG)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI R0_REG))
+	      (clobber (reg:SI R1_REG))
+	      (clobber (reg:SI R2_REG))])]
+  "TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+(define_insn "block_lump_real_i4"
+  [(parallel [(set (mem:BLK (reg:SI R4_REG))
+		   (mem:BLK (reg:SI R5_REG)))
+	      (use (match_operand:SI 0 "arith_reg_operand" "r"))
+	      (use (reg:SI R6_REG))
+	      (clobber (reg:SI PR_REG))
+	      (clobber (reg:SI T_REG))
+	      (clobber (reg:SI R4_REG))
+	      (clobber (reg:SI R5_REG))
+	      (clobber (reg:SI R6_REG))
+	      (clobber (reg:SI R0_REG))
+	      (clobber (reg:SI R1_REG))
+	      (clobber (reg:SI R2_REG))
+	      (clobber (reg:SI R3_REG))])]
+  "TARGET_HARD_SH4"
+  "jsr	@%0%#"
+  [(set_attr "type" "sfunc")
+   (set_attr "needs_delay_slot" "yes")])
+
+;; byte compare pattern
+;; temp = a ^ b;
+;; !((temp & 0xF000) && (temp & 0x0F00) && (temp & 0x00F0) && (temp & 0x000F))
+(define_insn "cmpstr_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (and:SI
+		 (and:SI
+		   (and:SI
+		     (zero_extract:SI
+		       (xor:SI (match_operand:SI 0 "arith_reg_operand" "r")
+			       (match_operand:SI 1 "arith_reg_operand" "r"))
+		       (const_int 8) (const_int 0))
+		     (zero_extract:SI (xor:SI (match_dup 0) (match_dup 1))
+				      (const_int 8) (const_int 8)))
+		    (zero_extract:SI (xor:SI (match_dup 0) (match_dup 1))
+				     (const_int 8) (const_int 16)))
+		 (zero_extract:SI (xor:SI (match_dup 0) (match_dup 1))
+				  (const_int 8) (const_int 24)))
+	       (const_int 0)))]
+  "TARGET_SH1"
+  "cmp/str	%0,%1"
+  [(set_attr "type" "mt_group")])
+
+(define_expand "cmpstrsi"
+  [(set (match_operand:SI 0 "register_operand")
+	(compare:SI (match_operand:BLK 1 "memory_operand")
+		    (match_operand:BLK 2 "memory_operand")))
+   (use (match_operand 3 "immediate_operand"))]
+  "TARGET_SH1 && optimize"
+{
+  if (! optimize_insn_for_size_p () && sh_expand_cmpstr (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "cmpstrnsi"
+  [(set (match_operand:SI 0 "register_operand")
+	(compare:SI (match_operand:BLK 1 "memory_operand")
+		    (match_operand:BLK 2 "memory_operand")))
+   (use (match_operand:SI 3 "immediate_operand"))
+   (use (match_operand:SI 4 "immediate_operand"))]
+  "TARGET_SH1 && optimize"
+{
+  if (! optimize_insn_for_size_p () && sh_expand_cmpnstr (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "strlensi"
+  [(set (match_operand:SI 0 "register_operand")
+	(unspec:SI [(match_operand:BLK 1 "memory_operand")
+		   (match_operand:SI 2 "immediate_operand")
+		   (match_operand:SI 3 "immediate_operand")]
+		  UNSPEC_BUILTIN_STRLEN))]
+  "TARGET_SH1 && optimize"
+{
+ if (! optimize_insn_for_size_p () && sh_expand_strlen (operands))
+   DONE;
+ else
+   FAIL;
+})
+
+
+;; -------------------------------------------------------------------------
+;; Floating point instructions.
+;; -------------------------------------------------------------------------
+
+;; ??? All patterns should have a type attribute.
+
+(define_expand "movpsi"
+  [(set (match_operand:PSI 0 "register_operand" "")
+	(match_operand:PSI 1 "general_movsrc_operand" ""))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "")
+
+;; The c / m alternative is a fake to guide reload to load directly into
+;; fpscr, since reload doesn't know how to use post-increment.
+;; TARGET_LEGITIMATE_ADDRESS_P guards about bogus addresses before reload,
+;; SECONDARY_INPUT_RELOAD_CLASS does this during reload, and the insn's
+;; predicate after reload.
+;; The mac_gp type for r/!c might look a bit odd, but it actually schedules
+;; like a mac -> gpr move.
+(define_insn "fpu_switch"
+  [(set (match_operand:PSI 0 "general_movdst_operand" "=c,c,r,c,c,r,m,r,<")
+	(match_operand:PSI 1 "general_movsrc_operand" "c,>,m,m,r,r,r,!c,c"))]
+  "TARGET_SH2E
+   && (! reload_completed
+       || true_regnum (operands[0]) != FPSCR_REG
+       || !MEM_P (operands[1])
+       || GET_CODE (XEXP (operands[1], 0)) != PLUS)"
+  "@
+	! precision stays the same
+	lds.l	%1,fpscr
+	mov.l	%1,%0
+	#
+	lds	%1,fpscr
+	mov	%1,%0
+	mov.l	%1,%0
+	sts	fpscr,%0
+	sts.l	fpscr,%0"
+  [(set_attr "length" "0,2,2,4,2,2,2,2,2")
+   (set_attr "type" "nil,mem_fpscr,load,mem_fpscr,gp_fpscr,move,store,
+		     mac_gp,fstore")])
+
+(define_peephole2
+  [(set (reg:PSI FPSCR_REG)
+	(mem:PSI (match_operand:SI 0 "register_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) && peep2_reg_dead_p (1, operands[0])"
+  [(const_int 0)]
+{
+  rtx fpscr, mem, new_insn;
+
+  fpscr = SET_DEST (PATTERN (curr_insn));
+  mem = SET_SRC (PATTERN (curr_insn));
+  mem = replace_equiv_address (mem, gen_rtx_POST_INC (Pmode, operands[0]));
+
+  new_insn = emit_insn (gen_fpu_switch (fpscr, mem));
+  add_reg_note (new_insn, REG_INC, operands[0]);
+  DONE;
+})
+
+(define_split
+  [(set (reg:PSI FPSCR_REG)
+	(mem:PSI (match_operand:SI 0 "register_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)
+   && (flag_peephole2 ? epilogue_completed : reload_completed)"
+  [(const_int 0)]
+{
+  rtx fpscr, mem, new_insn;
+
+  fpscr = SET_DEST (PATTERN (curr_insn));
+  mem = SET_SRC (PATTERN (curr_insn));
+  mem = replace_equiv_address (mem, gen_rtx_POST_INC (Pmode, operands[0]));
+
+  new_insn = emit_insn (gen_fpu_switch (fpscr, mem));
+  add_reg_note (new_insn, REG_INC, operands[0]);
+
+  if (!find_regno_note (curr_insn, REG_DEAD, true_regnum (operands[0])))
+    emit_insn (gen_addsi3 (operands[0], operands[0], GEN_INT (-4)));
+  DONE;
+})
+
+;; ??? This uses the fp unit, but has no type indicating that.
+;; If we did that, this would either give a bogus latency or introduce
+;; a bogus FIFO constraint.
+;; Since this insn is currently only used for prologues/epilogues,
+;; it is probably best to claim no function unit, which matches the
+;; current setting.
+(define_insn "toggle_sz"
+  [(set (reg:PSI FPSCR_REG)
+	(xor:PSI (reg:PSI FPSCR_REG) (const_int 1048576)))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fschg"
+  [(set_attr "type" "fpscr_toggle") (set_attr "fp_set" "unknown")])
+
+;; There's no way we can use it today, since optimize mode switching
+;; doesn't enable us to know from which mode we're switching to the
+;; mode it requests, to tell whether we can use a relative mode switch
+;; (like toggle_pr) or an absolute switch (like loading fpscr from
+;; memory).
+(define_insn "toggle_pr"
+  [(set (reg:PSI FPSCR_REG)
+	(xor:PSI (reg:PSI FPSCR_REG) (const_int 524288)))]
+  "TARGET_SH4A_FP && ! TARGET_FPU_SINGLE"
+  "fpchg"
+  [(set_attr "type" "fpscr_toggle")])
+
+(define_expand "addsf3"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand")
+	(plus:SF (match_operand:SF 1 "fp_arith_reg_operand")
+		 (match_operand:SF 2 "fp_arith_reg_operand")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_binop (&gen_addsf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*addsf3_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fadd.s	%1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn_and_split "unary_sf_op"
+  [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+	(vec_select:V2SF
+	 (vec_concat:V2SF
+	  (vec_select:SF
+	   (match_dup 0)
+	   (parallel [(not:BI (match_operand 3 "const_int_operand" "n"))]))
+	  (match_operator:SF 2 "unary_float_operator"
+	    [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f")
+			    (parallel [(match_operand 4
+					"const_int_operand" "n")]))]))
+	 (parallel [(not:BI (match_dup 3)) (match_dup 3)])))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "TARGET_SHMEDIA_FPU && reload_completed"
+  [(set (match_dup 5) (match_dup 6))]
+{
+  int endian = TARGET_LITTLE_ENDIAN ? 0 : 1;
+  rtx op1 = gen_rtx_REG (SFmode,
+			 (true_regnum (operands[1])
+			  + (INTVAL (operands[4]) ^ endian)));
+
+  operands[7] = gen_rtx_REG (SFmode,
+			     (true_regnum (operands[0])
+			      + (INTVAL (operands[3]) ^ endian)));
+  operands[6] = gen_rtx_fmt_e (GET_CODE (operands[2]), SFmode, op1);
+}
+  [(set_attr "type" "fparith_media")])
+
+(define_insn_and_split "binary_sf_op0"
+  [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+	(vec_concat:V2SF
+	  (match_operator:SF 3 "binary_float_operator"
+	    [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 0)]))
+	     (vec_select:SF (match_operand:V2SF 2 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 0)]))])
+	  (vec_select:SF
+	   (match_dup 0)
+	   (parallel [(const_int 1)]))))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (match_dup 5))]
+{
+  int endian = TARGET_LITTLE_ENDIAN ? 0 : 1;
+  rtx op1 = gen_rtx_REG (SFmode,
+			 true_regnum (operands[1]) + endian);
+  rtx op2 = gen_rtx_REG (SFmode,
+			 true_regnum (operands[2]) + endian);
+
+  operands[4] = gen_rtx_REG (SFmode,
+			     true_regnum (operands[0]) + endian);
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SFmode, op1, op2);
+}
+  [(set_attr "type" "fparith_media")])
+
+(define_insn_and_split "binary_sf_op1"
+  [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+	(vec_concat:V2SF
+	  (vec_select:SF
+	   (match_dup 0)
+	   (parallel [(const_int 0)]))
+	  (match_operator:SF 3 "binary_float_operator"
+	    [(vec_select:SF (match_operand:V2SF 1 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 1)]))
+	     (vec_select:SF (match_operand:V2SF 2 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 1)]))])))]
+  "TARGET_SHMEDIA_FPU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (match_dup 5))]
+{
+  int endian = TARGET_LITTLE_ENDIAN ? 0 : 1;
+  rtx op1 = gen_rtx_REG (SFmode, true_regnum (operands[1]) + (1 ^ endian));
+  rtx op2 = gen_rtx_REG (SFmode, true_regnum (operands[2]) + (1 ^ endian));
+
+  operands[4] = gen_rtx_REG (SFmode, true_regnum (operands[0]) + (1 ^ endian));
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SFmode, op1, op2);
+}
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "addsf3_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fadd	%2,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "subsf3"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
+		  (match_operand:SF 2 "fp_arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_binop (&gen_subsf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*subsf3_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")
+		  (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fsub.s	%1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "subsf3_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fsub	%2,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "mulsf3"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      emit_insn (gen_mulsf3_i (operands[0], operands[1], operands[2],
+		 get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*mulsf3_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fmul.s	%1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "mulsf3_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%0")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fmul	%2,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+;; FMA (fused multiply-add) patterns
+(define_expand "fmasf4"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand")
+	(fma:SF (match_operand:SF 1 "fp_arith_reg_operand")
+		(match_operand:SF 2 "fp_arith_reg_operand")
+		(match_operand:SF 3 "fp_arith_reg_operand")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      emit_sf_insn (gen_fmasf4_i (operands[0], operands[1], operands[2],
+				  operands[3], get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "fmasf4_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "w")
+		(match_operand:SF 2 "fp_arith_reg_operand" "f")
+		(match_operand:SF 3 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 4 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fmac	%1,%2,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "fmasf4_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")
+		(match_operand:SF 2 "fp_arith_reg_operand" "f")
+		(match_operand:SF 3 "fp_arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA_FPU"
+  "fmac.s %1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+;; For some cases such as 'a * b + a' the FMA pattern is not generated by
+;; previous transformations.  If FMA is generally allowed, let the combine
+;; pass utilize it.
+(define_insn_and_split "*fmasf4"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%w")
+			  (match_operand:SF 2 "fp_arith_reg_operand" "f"))
+		 (match_operand:SF 3 "arith_reg_operand" "0")))
+   (use (match_operand:PSI 4 "fpscr_operand"))]
+  "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF"
+  "fmac	%1,%2,%0"
+  "&& can_create_pseudo_p ()"
+  [(parallel [(set (match_dup 0)
+		   (fma:SF (match_dup 1) (match_dup 2) (match_dup 3)))
+	      (use (match_dup 4))])]
+{
+  /* Change 'b * a + a' into 'a * b + a'.
+     This is better for register allocation.  */
+  if (REGNO (operands[2]) == REGNO (operands[3]))
+    {
+      rtx tmp = operands[1];
+      operands[1] = operands[2];
+      operands[2] = tmp;
+    }
+}
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "*fmasf4_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
+			  (match_operand:SF 2 "fp_arith_reg_operand" "f"))
+		 (match_operand:SF 3 "fp_arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA_FPU && flag_fp_contract_mode != FP_CONTRACT_OFF"
+  "fmac.s %1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_expand "divsf3"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand")
+	(div:SF (match_operand:SF 1 "fp_arith_reg_operand")
+		(match_operand:SF 2 "fp_arith_reg_operand")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_binop (&gen_divsf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*divsf3_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(div:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")
+		(match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fdiv.s	%1, %2, %0"
+  [(set_attr "type" "fdiv_media")])
+
+(define_insn "divsf3_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_dest" "=f")
+	(div:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")
+		 (match_operand:SF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fdiv	%2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "floatdisf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float:SF (match_operand:DI 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "float.qs %1, %0"
+  [(set_attr "type" "fpconv_media")])
+
+(define_expand "floatsisf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(float:SF (match_operand:SI 1 "fpul_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_SINGLE)
+    {
+      emit_sf_insn (gen_floatsisf2_i4 (operands[0], operands[1],
+				       get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*floatsisf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float:SF (match_operand:SI 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "float.ls	%1, %0"
+  [(set_attr "type" "fpconv_media")])
+
+(define_insn "floatsisf2_i4"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float:SF (match_operand:SI 1 "fpul_operand" "y")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "float	%1,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "*floatsisf2_ie"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float:SF (match_operand:SI 1 "fpul_operand" "y")))]
+  "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "float	%1,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "fp_arith_reg_dest" "=f")
+	(fix:DI (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "ftrc.sq %1, %0"
+  [(set_attr "type" "fpconv_media")])
+
+(define_expand "fix_truncsfsi2"
+  [(set (match_operand:SI 0 "fpul_operand" "=y")
+	(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_SINGLE)
+    {
+      emit_sf_insn (gen_fix_truncsfsi2_i4 (operands[0], operands[1],
+					   get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*fix_truncsfsi2_media"
+  [(set (match_operand:SI 0 "fp_arith_reg_operand" "=f")
+	(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "ftrc.sl	%1, %0"
+  [(set_attr "type" "fpconv_media")])
+
+(define_insn "fix_truncsfsi2_i4"
+  [(set (match_operand:SI 0 "fpul_operand" "=y")
+	(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "ftrc	%1,%0"
+  [(set_attr "type" "ftrc_s")
+   (set_attr "fp_mode" "single")])
+
+;; ??? This pattern is used nowhere.  fix_truncsfsi2 always expands to
+;; fix_truncsfsi2_i4.
+;; (define_insn "fix_truncsfsi2_i4_2"
+;;  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+;;	(fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
+;;   (use (reg:PSI FPSCR_REG))
+;;   (clobber (reg:SI FPUL_REG))]
+;;  "TARGET_SH4"
+;;  "#"
+;;  [(set_attr "length" "4")
+;;   (set_attr "fp_mode" "single")])
+
+;;(define_split
+;;  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+;;	(fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
+;;   (use (match_operand:PSI 2 "fpscr_operand" "c"))
+;;   (clobber (reg:SI FPUL_REG))]
+;;  "TARGET_SH4"
+;;  [(parallel [(set (reg:SI FPUL_REG) (fix:SI (match_dup 1)))
+;;	      (use (match_dup 2))])
+;;   (set (match_dup 0) (reg:SI FPUL_REG))])
+
+(define_insn "*fixsfsi"
+  [(set (match_operand:SI 0 "fpul_operand" "=y")
+	(fix:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "ftrc	%1,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "cmpgtsf_t"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "fcmp/gt	%1,%0"
+  [(set_attr "type" "fp_cmp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "cmpeqsf_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SH2E && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "fcmp/eq	%1,%0"
+  [(set_attr "type" "fp_cmp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "ieee_ccmpeqsf_t"
+  [(set (reg:SI T_REG)
+	(ior:SI (reg:SI T_REG)
+		(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+		       (match_operand:SF 1 "fp_arith_reg_operand" "f"))))]
+  "TARGET_SH2E && TARGET_IEEE && ! (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+{
+  return output_ieee_ccmpeq (insn, operands);
+}
+  [(set_attr "length" "4")])
+
+
+(define_insn "cmpgtsf_t_i4"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "fcmp/gt	%1,%0"
+  [(set_attr "type" "fp_cmp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "cmpeqsf_t_i4"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_SINGLE)"
+  "fcmp/eq	%1,%0"
+  [(set_attr "type" "fp_cmp")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "*ieee_ccmpeqsf_t_4"
+  [(set (reg:SI T_REG)
+	(ior:SI (reg:SI T_REG)
+		(eq:SI (match_operand:SF 0 "fp_arith_reg_operand" "f")
+		       (match_operand:SF 1 "fp_arith_reg_operand" "f"))))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_IEEE && (TARGET_SH4 || TARGET_SH2A_SINGLE)"
+{
+  return output_ieee_ccmpeq (insn, operands);
+}
+  [(set_attr "length" "4")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "cmpeqsf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpeq.s	%1, %2, %0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpgtsf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gt:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpgt.s	%1, %2, %0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpgesf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ge:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpge.s	%1, %2, %0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpunsf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unordered:SI (match_operand:SF 1 "fp_arith_reg_operand" "f")
+		      (match_operand:SF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpun.s	%1, %2, %0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_expand "cbranchsf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "sh_float_comparison_operator"
+		       [(match_operand:SF 1 "arith_operand" "")
+			(match_operand:SF 2 "arith_operand" "")])
+		      (match_operand 3 "" "")
+		      (pc)))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SHMEDIA)
+    emit_jump_insn (gen_cbranchfp4_media (operands[0], operands[1], operands[2],
+					  operands[3]));
+  else
+    sh_emit_compare_and_branch (operands, SFmode);
+  DONE;
+})
+
+(define_expand "negsf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_unop (&gen_negsf2_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*negsf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fneg.s	%1, %0"
+  [(set_attr "type" "fmove_media")])
+
+(define_insn "negsf2_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(neg:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fneg	%0"
+  [(set_attr "type" "fmove")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "sqrtsf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))]
+  "TARGET_SH3E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH3E)
+    {
+      expand_sf_unop (&gen_sqrtsf2_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*sqrtsf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fsqrt.s	%1, %0"
+  [(set_attr "type" "fdiv_media")])
+
+(define_insn "sqrtsf2_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(sqrt:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH3E"
+  "fsqrt	%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "fp_mode" "single")])
+
+(define_insn "rsqrtsf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(div:SF (match_operand:SF 1 "immediate_operand" "i")
+		(sqrt:SF (match_operand:SF 2 "fp_arith_reg_operand" "0"))))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_FPU_ANY && TARGET_FSRRA
+   && operands[1] == CONST1_RTX (SFmode)"
+  "fsrra	%0"
+  [(set_attr "type" "fsrra")
+   (set_attr "fp_mode" "single")])
+
+;; When the sincos pattern is defined, the builtin functions sin and cos
+;; will be expanded to the sincos pattern and one of the output values will
+;; remain unused.
+(define_expand "sincossf3"
+  [(set (match_operand:SF 0 "nonimmediate_operand")
+	(unspec:SF [(match_operand:SF 2 "fp_arith_reg_operand")] UNSPEC_FCOSA))
+   (set (match_operand:SF 1 "nonimmediate_operand")
+	(unspec:SF [(match_dup 2)] UNSPEC_FSINA))]
+  "TARGET_FPU_ANY && TARGET_FSCA"
+{
+  rtx scaled = gen_reg_rtx (SFmode);
+  rtx truncated = gen_reg_rtx (SImode);
+  rtx fsca = gen_reg_rtx (V2SFmode);
+  rtx scale_reg = force_reg (SFmode, sh_fsca_sf2int ());
+
+  emit_sf_insn (gen_mulsf3 (scaled, operands[2], scale_reg));
+  emit_sf_insn (gen_fix_truncsfsi2 (truncated, scaled));
+  emit_sf_insn (gen_fsca (fsca, truncated, sh_fsca_int2sf (),
+			  get_fpscr_rtx ()));
+
+  emit_move_insn (operands[0], gen_rtx_SUBREG (SFmode, fsca, 4));
+  emit_move_insn (operands[1], gen_rtx_SUBREG (SFmode, fsca, 0));
+  DONE;
+})
+
+(define_insn_and_split "fsca"
+  [(set (match_operand:V2SF 0 "fp_arith_reg_operand" "=f")
+	(vec_concat:V2SF
+	 (unspec:SF [(mult:SF
+		      (float:SF (match_operand:SI 1 "fpul_fsca_operand" "y"))
+		      (match_operand:SF 2 "fsca_scale_factor" "i"))
+		    ] UNSPEC_FSINA)
+	 (unspec:SF [(mult:SF (float:SF (match_dup 1)) (match_dup 2))
+		    ] UNSPEC_FCOSA)))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "TARGET_FPU_ANY && TARGET_FSCA"
+  "fsca	fpul,%d0"
+  "&& !fpul_operand (operands[1], SImode)"
+  [(const_int 0)]
+{
+  /* If operands[1] is something like (fix:SF (float:SF (reg:SI))) reduce it
+     to a simple reg, otherwise reload will have trouble reloading the
+     pseudo into fpul.  */
+  rtx x = XEXP (operands[1], 0);
+  while (x != NULL_RTX && !fpul_operand (x, SImode))
+    {
+      gcc_assert (GET_CODE (x) == FIX || GET_CODE (x) == FLOAT);
+      x = XEXP (x, 0);
+    }
+
+  gcc_assert (x != NULL_RTX && fpul_operand (x, SImode));
+  emit_insn (gen_fsca (operands[0], x, operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "fsca")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "abssf2"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      expand_sf_unop (&gen_abssf2_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*abssf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fabs.s	%1, %0"
+  [(set_attr "type" "fmove_media")])
+
+(define_insn "abssf2_i"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(abs:SF (match_operand:SF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_SH2E"
+  "fabs	%0"
+  [(set_attr "type" "fmove")
+   (set_attr "fp_mode" "single")])
+
+(define_expand "adddf3"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_binop (&gen_adddf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*adddf3_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "%f")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fadd.d	%1, %2, %0"
+  [(set_attr "type" "dfparith_media")])
+
+(define_insn "adddf3_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(plus:DF (match_operand:DF 1 "fp_arith_reg_operand" "%0")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fadd	%2,%0"
+  [(set_attr "type" "dfp_arith")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "subdf3"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
+		  (match_operand:DF 2 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_binop (&gen_subdf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*subdf3_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")
+		  (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fsub.d	%1, %2, %0"
+  [(set_attr "type" "dfparith_media")])
+
+(define_insn "subdf3_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(minus:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")
+		  (match_operand:DF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fsub	%2,%0"
+  [(set_attr "type" "dfp_arith")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "muldf3"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_binop (&gen_muldf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*muldf3_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "%f")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fmul.d	%1, %2, %0"
+  [(set_attr "type" "dfmul_media")])
+
+(define_insn "muldf3_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(mult:DF (match_operand:DF 1 "fp_arith_reg_operand" "%0")
+		 (match_operand:DF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fmul	%2,%0"
+  [(set_attr "type" "dfp_mul")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "divdf3"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(div:DF (match_operand:DF 1 "fp_arith_reg_operand" "")
+		(match_operand:DF 2 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_binop (&gen_divdf3_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*divdf3_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(div:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")
+		(match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fdiv.d	%1, %2, %0"
+  [(set_attr "type" "dfdiv_media")])
+
+(define_insn "divdf3_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(div:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")
+		(match_operand:DF 2 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 3 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fdiv	%2,%0"
+  [(set_attr "type" "dfdiv")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "floatdidf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float:DF (match_operand:DI 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "float.qd	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_expand "floatsidf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(float:DF (match_operand:SI 1 "fpul_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_floatsidf2_i (operands[0], operands[1],
+				      get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*floatsidf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float:DF (match_operand:SI 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "float.ld	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_insn "floatsidf2_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float:DF (match_operand:SI 1 "fpul_operand" "y")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "float	%1,%0"
+  [(set_attr "type" "dfp_conv")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "fp_arith_reg_dest" "=f")
+	(fix:DI (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "ftrc.dq	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_expand "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "fpul_operand" "")
+	(fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_fix_truncdfsi2_i (operands[0], operands[1],
+					  get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*fix_truncdfsi2_media"
+  [(set (match_operand:SI 0 "fp_arith_reg_operand" "=f")
+	(fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "ftrc.dl	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_insn "fix_truncdfsi2_i"
+  [(set (match_operand:SI 0 "fpul_operand" "=y")
+	(fix:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "ftrc	%1,%0"
+  [(set_attr "type" "dfp_conv")
+   (set_attr "dfp_comp" "no")
+   (set_attr "fp_mode" "double")])
+
+;; ??? This pattern is used nowhere.  fix_truncdfsi2 always expands to
+;; fix_truncdfsi2_i.
+;; (define_insn "fix_truncdfsi2_i4"
+;;   [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+;; 	(fix:SI (match_operand:DF 1 "arith_reg_operand" "f")))
+;;    (use (match_operand:PSI 2 "fpscr_operand" "c"))
+;;    (clobber (reg:SI FPUL_REG))]
+;;   "TARGET_SH4"
+;;   "#"
+;;   [(set_attr "length" "4")
+;;    (set_attr "fp_mode" "double")])
+;;
+;; (define_split
+;;   [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+;; 	(fix:SI (match_operand:DF 1 "arith_reg_operand" "f")))
+;;    (use (match_operand:PSI 2 "fpscr_operand" "c"))
+;;    (clobber (reg:SI FPUL_REG))]
+;;   "TARGET_SH4"
+;;   [(parallel [(set (reg:SI FPUL_REG) (fix:SI (match_dup 1)))
+;; 	      (use (match_dup 2))])
+;;    (set (match_dup 0) (reg:SI FPUL_REG))])
+
+(define_insn "cmpgtdf_t"
+  [(set (reg:SI T_REG)
+	(gt:SI (match_operand:DF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:DF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fcmp/gt	%1,%0"
+  [(set_attr "type" "dfp_cmp")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "cmpeqdf_t"
+  [(set (reg:SI T_REG)
+	(eq:SI (match_operand:DF 0 "fp_arith_reg_operand" "f")
+	       (match_operand:DF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fcmp/eq	%1,%0"
+  [(set_attr "type" "dfp_cmp")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "*ieee_ccmpeqdf_t"
+  [(set (reg:SI T_REG)
+	(ior:SI (reg:SI T_REG)
+		(eq:SI (match_operand:DF 0 "fp_arith_reg_operand" "f")
+		       (match_operand:DF 1 "fp_arith_reg_operand" "f"))))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "TARGET_IEEE && (TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+{
+  return output_ieee_ccmpeq (insn, operands);
+}
+  [(set_attr "length" "4")
+   (set_attr "fp_mode" "double")])
+
+(define_insn "cmpeqdf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(eq:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpeq.d	%1,%2,%0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpgtdf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(gt:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpgt.d	%1,%2,%0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpgedf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ge:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")
+	       (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpge.d	%1,%2,%0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_insn "cmpundf_media"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unordered:SI (match_operand:DF 1 "fp_arith_reg_operand" "f")
+		      (match_operand:DF 2 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcmpun.d	%1,%2,%0"
+  [(set_attr "type" "fcmp_media")])
+
+(define_expand "cbranchdf4"
+  [(set (pc)
+	(if_then_else (match_operator 0 "sh_float_comparison_operator"
+		       [(match_operand:DF 1 "arith_operand" "")
+			(match_operand:DF 2 "arith_operand" "")])
+		      (match_operand 3 "" "")
+		      (pc)))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SHMEDIA)
+    emit_jump_insn (gen_cbranchfp4_media (operands[0], operands[1], operands[2],
+					  operands[3]));
+  else
+    sh_emit_compare_and_branch (operands, DFmode);
+  DONE;
+})
+
+(define_expand "negdf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand")
+	(neg:DF (match_operand:DF 1 "fp_arith_reg_operand")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_unop (&gen_negdf2_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*negdf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(neg:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fneg.d	%1, %0"
+  [(set_attr "type" "fmove_media")])
+
+(define_insn "negdf2_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(neg:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fneg	%0"
+  [(set_attr "type" "fmove")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "sqrtdf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand")
+	(sqrt:DF (match_operand:DF 1 "fp_arith_reg_operand")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_unop (&gen_sqrtdf2_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*sqrtdf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(sqrt:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fsqrt.d	%1, %0"
+  [(set_attr "type" "dfdiv_media")])
+
+(define_insn "sqrtdf2_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(sqrt:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fsqrt	%0"
+  [(set_attr "type" "dfdiv")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "absdf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand")
+	(abs:DF (match_operand:DF 1 "fp_arith_reg_operand")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      expand_df_unop (&gen_absdf2_i, operands);
+      DONE;
+    }
+})
+
+(define_insn "*absdf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(abs:DF (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fabs.d	%1, %0"
+  [(set_attr "type" "fmove_media")])
+
+(define_insn "absdf2_i"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(abs:DF (match_operand:DF 1 "fp_arith_reg_operand" "0")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fabs	%0"
+  [(set_attr "type" "fmove")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "")
+	(float_extend:DF (match_operand:SF 1 "fpul_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_extendsfdf2_i4 (operands[0], operands[1],
+					get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*extendsfdf2_media"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcnv.sd	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_insn "extendsfdf2_i4"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "fpul_operand" "y")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fcnvsd  %1,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "double")])
+
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF 0 "fpul_operand" "")
+	(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "")))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE) || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
+    {
+      emit_df_insn (gen_truncdfsf2_i4 (operands[0], operands[1],
+				       get_fpscr_rtx ()));
+      DONE;
+    }
+})
+
+(define_insn "*truncdfsf2_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "f")))]
+  "TARGET_SHMEDIA_FPU"
+  "fcnv.ds	%1, %0"
+  [(set_attr "type" "dfpconv_media")])
+
+(define_insn "truncdfsf2_i4"
+  [(set (match_operand:SF 0 "fpul_operand" "=y")
+	(float_truncate:SF (match_operand:DF 1 "fp_arith_reg_operand" "f")))
+   (use (match_operand:PSI 2 "fpscr_operand" "c"))]
+  "(TARGET_SH4 || TARGET_SH2A_DOUBLE)"
+  "fcnvds  %1,%0"
+  [(set_attr "type" "fp")
+   (set_attr "fp_mode" "double")])
+
+;; -------------------------------------------------------------------------
+;; Bit field extract patterns.
+;; -------------------------------------------------------------------------
+
+;; These give better code for packed bitfields,  because they allow
+;; auto-increment addresses to be generated.
+
+(define_expand "insv"
+  [(set (zero_extract:SI (match_operand:QI 0 "memory_operand" "")
+			 (match_operand:SI 1 "immediate_operand" "")
+			 (match_operand:SI 2 "immediate_operand" ""))
+	(match_operand:SI 3 "general_operand" ""))]
+  "TARGET_SH1 && TARGET_BIG_ENDIAN"
+{
+  rtx addr_target, orig_address, shift_reg, qi_val;
+  HOST_WIDE_INT bitsize, size, v = 0;
+  rtx x = operands[3];
+
+  if (TARGET_SH2A && TARGET_BITOPS
+      && (satisfies_constraint_Sbw (operands[0])
+	  || satisfies_constraint_Sbv (operands[0]))
+      && satisfies_constraint_M (operands[1])
+      && satisfies_constraint_K03 (operands[2]))
+    {
+      if (satisfies_constraint_N (operands[3]))
+	{
+	  emit_insn (gen_bclr_m2a (operands[0], operands[2]));
+	  DONE;
+	}
+      else if (satisfies_constraint_M (operands[3]))
+	{
+	  emit_insn (gen_bset_m2a (operands[0], operands[2]));
+	  DONE;
+	}
+      else if ((REG_P (operands[3]) && REGNO (operands[3]) == T_REG)
+		&& satisfies_constraint_M (operands[1]))
+	{
+	  emit_insn (gen_bst_m2a (operands[0], operands[2]));
+	  DONE;
+	}
+      else if (REG_P (operands[3])
+	       && satisfies_constraint_M (operands[1]))
+	{
+	  emit_insn (gen_bld_reg (operands[3], const0_rtx));
+	  emit_insn (gen_bst_m2a (operands[0], operands[2]));
+	  DONE;
+	}
+    }
+  /* ??? expmed doesn't care for non-register predicates.  */
+  if (! memory_operand (operands[0], VOIDmode)
+      || ! immediate_operand (operands[1], VOIDmode)
+      || ! immediate_operand (operands[2], VOIDmode)
+      || ! general_operand (x, VOIDmode))
+    FAIL;
+  /* If this isn't a 16 / 24 / 32 bit field, or if
+     it doesn't start on a byte boundary, then fail.  */
+  bitsize = INTVAL (operands[1]);
+  if (bitsize < 16 || bitsize > 32 || bitsize % 8 != 0
+      || (INTVAL (operands[2]) % 8) != 0)
+    FAIL;
+
+  size = bitsize / 8;
+  orig_address = XEXP (operands[0], 0);
+  shift_reg = gen_reg_rtx (SImode);
+  if (CONST_INT_P (x))
+    {
+      v = INTVAL (x);
+      qi_val = force_reg (QImode, GEN_INT (trunc_int_for_mode (v, QImode)));
+    }
+  else
+    {
+      emit_insn (gen_movsi (shift_reg, operands[3]));
+      qi_val = gen_rtx_SUBREG (QImode, shift_reg, 3);
+    }
+  addr_target = copy_addr_to_reg (plus_constant (Pmode,
+						 orig_address, size - 1));
+
+  operands[0] = replace_equiv_address (operands[0], addr_target);
+  emit_insn (gen_movqi (operands[0], qi_val));
+
+  while (size -= 1)
+    {
+      if (CONST_INT_P (x))
+	qi_val
+	  = force_reg (QImode, GEN_INT (trunc_int_for_mode (v >>= 8, QImode)));
+      else
+	{
+	  emit_insn (gen_lshrsi3_k (shift_reg, shift_reg, GEN_INT (8)));
+	  qi_val = gen_rtx_SUBREG (QImode, shift_reg, 3);
+	}
+      emit_insn (gen_addsi3 (addr_target, addr_target, constm1_rtx));
+      emit_insn (gen_movqi (operands[0], qi_val));
+    }
+
+  DONE;
+})
+
+(define_insn "movua"
+  [(set (match_operand:SI 0 "register_operand" "=z")
+	(unspec:SI [(match_operand:BLK 1 "unaligned_load_operand" "Sua>")]
+		   UNSPEC_MOVUA))]
+  "TARGET_SH4A_ARCH"
+  "movua.l	%1,%0"
+  [(set_attr "type" "movua")])
+
+;; We shouldn't need this, but cse replaces increments with references
+;; to other regs before flow has a chance to create post_inc
+;; addressing modes, and only postreload's cse_move2add brings the
+;; increments back to a usable form.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (mem:SI (match_operand:SI 1 "register_operand" ""))
+			 (const_int 32) (const_int 0)))
+   (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))]
+  "TARGET_SH4A_ARCH && REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (mem:SI (post_inc:SI
+				  (match_operand:SI 1 "register_operand" "")))
+			 (const_int 32) (const_int 0)))]
+  "")
+
+(define_expand "extv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extract:SI (match_operand:QI 1 "unaligned_load_operand" "")
+			 (match_operand 2 "const_int_operand" "")
+			 (match_operand 3 "const_int_operand" "")))]
+  "TARGET_SH4A_ARCH || TARGET_SH2A"
+{
+  if (TARGET_SH2A && TARGET_BITOPS
+      && (satisfies_constraint_Sbw (operands[1])
+	  || satisfies_constraint_Sbv (operands[1]))
+      && satisfies_constraint_M (operands[2])
+      && satisfies_constraint_K03 (operands[3]))
+   {
+      emit_insn (gen_bldsign_m2a (operands[1], operands[3]));
+      if (REGNO (operands[0]) != T_REG)
+	emit_insn (gen_movsi (operands[0], gen_rtx_REG (SImode, T_REG)));
+      DONE;
+   }
+  if (TARGET_SH4A_ARCH
+      && INTVAL (operands[2]) == 32
+      && INTVAL (operands[3]) == 0
+      && MEM_P (operands[1]) && MEM_ALIGN (operands[1]) < 32)
+    {
+      rtx src = adjust_address (operands[1], BLKmode, 0);
+      set_mem_size (src, 4);
+      emit_insn (gen_movua (operands[0], src));
+      DONE;
+    }
+
+  FAIL;
+})
+
+(define_expand "extzv"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extract:SI (match_operand:QI 1 "unaligned_load_operand" "")
+			 (match_operand 2 "const_int_operand" "")
+			 (match_operand 3 "const_int_operand" "")))]
+  "TARGET_SH4A_ARCH || TARGET_SH2A"
+{
+  if (TARGET_SH2A && TARGET_BITOPS
+      && (satisfies_constraint_Sbw (operands[1])
+	  || satisfies_constraint_Sbv (operands[1]))
+      && satisfies_constraint_M (operands[2])
+      && satisfies_constraint_K03 (operands[3]))
+    {
+      emit_insn (gen_bld_m2a (operands[1], operands[3]));
+      if (REGNO (operands[0]) != T_REG)
+	emit_insn (gen_movsi (operands[0], gen_rtx_REG (SImode, T_REG)));
+      DONE;
+    }
+  if (TARGET_SH4A_ARCH
+      && INTVAL (operands[2]) == 32
+      && INTVAL (operands[3]) == 0
+      && MEM_P (operands[1]) && MEM_ALIGN (operands[1]) < 32)
+    {
+      rtx src = adjust_address (operands[1], BLKmode, 0);
+      set_mem_size (src, 4);
+      emit_insn (gen_movua (operands[0], src));
+      DONE;
+    }
+
+  FAIL;
+})
+
+;; SH2A instructions for bitwise operations.
+;; FIXME: Convert multiple instruction insns to insn_and_split.
+;; FIXME: Use iterators to fold at least and,xor,or insn variations.
+
+;; Clear a bit in a memory location.
+(define_insn "bclr_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv")
+	(and:QI
+	    (not:QI (ashift:QI (const_int 1)
+			(match_operand:QI 1 "const_int_operand" "K03,K03")))
+	    (match_dup 0)))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bclr.b	%1,%0
+	bclr.b	%1,@(0,%t0)"
+[(set_attr "length" "4,4")])
+
+(define_insn "bclrmem_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv")
+        (and:QI (match_dup 0)
+                (match_operand:QI 1 "const_int_operand" "Psz,Psz")))]
+  "TARGET_SH2A && satisfies_constraint_Psz (operands[1]) && TARGET_BITOPS"
+  "@
+        bclr.b	%W1,%0
+        bclr.b	%W1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+;; Set a bit in a memory location.
+(define_insn "bset_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv")
+	(ior:QI
+	    (ashift:QI (const_int 1)
+		       (match_operand:QI 1 "const_int_operand" "K03,K03"))
+	    (match_dup 0)))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bset.b	%1,%0
+	bset.b	%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+(define_insn "bsetmem_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,Sbv")
+	(ior:QI (match_dup 0)
+		(match_operand:QI 1 "const_int_operand" "Pso,Pso")))]
+  "TARGET_SH2A && satisfies_constraint_Pso (operands[1]) && TARGET_BITOPS"
+  "@
+        bset.b	%V1,%0
+        bset.b	%V1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+;;; Transfer the contents of the T bit to a specified bit of memory.
+(define_insn "bst_m2a"
+  [(set (match_operand:QI 0 "bitwise_memory_operand" "+Sbw,m")
+	(if_then_else (eq (reg:SI T_REG) (const_int 0))
+	    (and:QI
+		(not:QI (ashift:QI (const_int 1)
+			(match_operand:QI 1 "const_int_operand" "K03,K03")))
+		(match_dup 0))
+	    (ior:QI
+		(ashift:QI (const_int 1) (match_dup 1))
+		(match_dup 0))))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bst.b	%1,%0
+	bst.b	%1,@(0,%t0)"
+  [(set_attr "length" "4")])
+
+;; Store a specified bit of memory in the T bit.
+(define_insn "bld_m2a"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI
+	    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,Sbv")
+	    (const_int 1)
+	    (match_operand 1 "const_int_operand" "K03,K03")))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bld.b	%1,%0
+	bld.b	%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+;; Store a specified bit of memory in the T bit.
+(define_insn "bldsign_m2a"
+  [(set (reg:SI T_REG)
+	(sign_extract:SI
+	    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m")
+	    (const_int 1)
+	    (match_operand 1 "const_int_operand" "K03,K03")))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bld.b	%1,%0
+	bld.b	%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+;; Store a specified bit of the LSB 8 bits of a register in the T bit.
+(define_insn "bld_reg"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (match_operand:SI 0 "arith_reg_operand" "r")
+			 (const_int 1)
+			 (match_operand 1 "const_int_operand" "K03")))]
+  "TARGET_SH2A && satisfies_constraint_K03 (operands[1])"
+  "bld	%1,%0")
+
+(define_insn "*bld_regqi"
+  [(set (reg:SI T_REG)
+	(zero_extract:SI (match_operand:QI 0 "arith_reg_operand" "r")
+			 (const_int 1)
+			 (match_operand 1 "const_int_operand" "K03")))]
+  "TARGET_SH2A && satisfies_constraint_K03 (operands[1])"
+  "bld	%1,%0")
+
+;; Take logical and of a specified bit of memory with the T bit and
+;; store its result in the T bit.
+(define_insn "band_m2a"
+  [(set (reg:SI T_REG)
+	(and:SI (reg:SI T_REG)
+		(zero_extract:SI
+		    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m")
+		    (const_int 1)
+		    (match_operand 1 "const_int_operand" "K03,K03"))))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	band.b	%1,%0
+	band.b	%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+(define_insn "bandreg_m2a"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(and:SI (zero_extract:SI
+		    (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv")
+		    (const_int 1)
+		    (match_operand 2 "const_int_operand" "K03,K03"))
+        	(match_operand:SI 3 "register_operand" "r,r")))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[2])"
+{
+  static const char* alt[] =
+  {
+       "band.b	%2,%1"		"\n"
+    "	movt	%0",
+
+       "band.b	%2,@(0,%t1)"	"\n"
+    "	movt	%0"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "length" "6,6")])
+
+;; Take logical or of a specified bit of memory with the T bit and
+;; store its result in the T bit.
+(define_insn "bor_m2a"
+  [(set (reg:SI T_REG)
+	(ior:SI (reg:SI T_REG)
+		(zero_extract:SI
+		    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m")
+		    (const_int 1)
+		    (match_operand 1 "const_int_operand" "K03,K03"))))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bor.b	%1,%0
+	bor.b	%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+(define_insn "borreg_m2a"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (zero_extract:SI
+		    (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv")
+		    (const_int 1)
+		    (match_operand 2 "const_int_operand" "K03,K03"))
+		(match_operand:SI 3 "register_operand" "=r,r")))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[2])"
+{
+  static const char* alt[] =
+  {
+       "bor.b	%2,%1"		"\n"
+    "	movt	%0",
+
+       "bor.b	%2,@(0,%t1)"	"\n"
+    "	movt	%0"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "length" "6,6")])
+
+;; Take exclusive or of a specified bit of memory with the T bit and
+;; store its result in the T bit.
+(define_insn "bxor_m2a"
+  [(set (reg:SI T_REG)
+	(xor:SI (reg:SI T_REG)
+		(zero_extract:SI
+		    (match_operand:QI 0 "bitwise_memory_operand" "Sbw,m")
+		    (const_int 1)
+		    (match_operand 1 "const_int_operand" "K03,K03"))))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[1])"
+  "@
+	bxor.b	%1,%0
+	bxor.b	%1,@(0,%t0)"
+  [(set_attr "length" "4,4")])
+
+(define_insn "bxorreg_m2a"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(xor:SI (zero_extract:SI
+		    (match_operand:QI 1 "bitwise_memory_operand" "Sbw,Sbv")
+		    (const_int 1)
+		    (match_operand 2 "const_int_operand" "K03,K03"))
+		(match_operand:SI 3 "register_operand" "=r,r")))]
+  "TARGET_SH2A && TARGET_BITOPS && satisfies_constraint_K03 (operands[2])"
+{
+  static const char* alt[] =
+  {
+       "bxor.b	%2,%1"		"\n"
+    "	movt	%0",
+
+       "bxor.b	%2,@(0,%t1)"	"\n"
+    "	movt	%0"
+  };
+  return alt[which_alternative];
+}
+  [(set_attr "length" "6,6")])
+
+;; -------------------------------------------------------------------------
+;; Peepholes
+;; -------------------------------------------------------------------------
+;; This matches cases where the bit in a memory location is set.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+	(sign_extend:SI (match_operand:QI 1 "bitwise_memory_operand")))
+   (set (match_dup 0)
+	(ior:SI (match_dup 0)
+	(match_operand:SI 2 "const_int_operand")))
+   (set (match_dup 1)
+	(match_operand 3 "arith_reg_operand"))]
+  "TARGET_SH2A && TARGET_BITOPS
+   && satisfies_constraint_Pso (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])"
+  [(set (match_dup 1)
+        (ior:QI (match_dup 1) (match_dup 2)))]
+  "")
+
+;; This matches cases where the bit in a memory location is cleared.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand")
+	(sign_extend:SI (match_operand:QI 1 "bitwise_memory_operand")))
+   (set (match_dup 0)
+	(and:SI (match_dup 0)
+	(match_operand:SI 2 "const_int_operand")))
+   (set (match_dup 1)
+	(match_operand 3 "arith_reg_operand"))]
+  "TARGET_SH2A && TARGET_BITOPS
+   && satisfies_constraint_Psz (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])"
+  [(set (match_dup 1)
+        (and:QI (match_dup 1) (match_dup 2)))]
+  "")
+
+;; This matches cases where a stack pointer increment at the start of the
+;; epilogue combines with a stack slot read loading the return value.
+(define_peephole
+  [(set (match_operand:SI 0 "arith_reg_operand" "")
+	(mem:SI (match_operand:SI 1 "arith_reg_operand" "")))
+   (set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))]
+  "TARGET_SH1 && REGNO (operands[1]) != REGNO (operands[0])"
+  "mov.l	@%1+,%0")
+
+;; See the comment on the dt combiner pattern above.
+(define_peephole
+  [(set (match_operand:SI 0 "arith_reg_operand" "=r")
+	(plus:SI (match_dup 0)
+		 (const_int -1)))
+   (set (reg:SI T_REG)
+	(eq:SI (match_dup 0) (const_int 0)))]
+  "TARGET_SH2"
+  "dt	%0")
+
+;; The following peepholes fold load sequences for which reload was not
+;; able to generate a displacement addressing move insn.
+;; This can happen when reload has to transform a move insn 
+;; without displacement into one with displacement.  Or when reload can't
+;; fit a displacement into the insn's constraints.  In the latter case, the
+;; load destination reg remains at r0, which reload compensates by inserting
+;; another mov insn.
+
+;; Fold sequence:
+;;	mov #54,r0
+;;	mov.{b,w} @(r0,r15),r0
+;;	mov r0,r3
+;; into:
+;;	mov.{b,w} @(54,r15),r3
+;;
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(sign_extend:SI
+	 (mem:QI (plus:SI (match_dup 0)
+			  (match_operand:SI 3 "arith_reg_operand" "")))))
+   (set (match_operand:QI 4 "arith_reg_dest" "")
+	(match_operand:QI 5 "arith_reg_operand" ""))]
+  "TARGET_SH2A
+   && sh_legitimate_index_p (QImode, operands[1], true, true)
+   && REGNO (operands[2]) == REGNO (operands[5])
+   && peep2_reg_dead_p (3, operands[5])"
+  [(set (match_dup 4) (mem:QI (plus:SI (match_dup 3) (match_dup 1))))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	(sign_extend:SI
+	 (mem:HI (plus:SI (match_dup 0)
+			  (match_operand:SI 3 "arith_reg_operand" "")))))
+   (set (match_operand:HI 4 "arith_reg_dest" "")
+	(match_operand:HI 5 "arith_reg_operand" ""))]
+  "TARGET_SH2A
+   && sh_legitimate_index_p (HImode, operands[1], true, true)
+   && REGNO (operands[2]) == REGNO (operands[5])
+   && peep2_reg_dead_p (3, operands[5])"
+  [(set (match_dup 4) (mem:HI (plus:SI (match_dup 3) (match_dup 1))))]
+  "")
+
+;; Fold sequence:
+;;	mov #54,r0
+;;	mov.{b,w} @(r0,r15),r1
+;; into:
+;;	mov.{b,w} @(54,r15),r1
+;;
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	 (sign_extend:SI
+	 (mem:QI (plus:SI (match_dup 0)
+			  (match_operand:SI 3 "arith_reg_operand" "")))))]
+  "TARGET_SH2A
+   && sh_legitimate_index_p (QImode, operands[1], true, true)
+   && (peep2_reg_dead_p (2, operands[0])
+       || REGNO (operands[0]) == REGNO (operands[2]))"
+  [(set (match_dup 2)
+	(sign_extend:SI (mem:QI (plus:SI (match_dup 3) (match_dup 1)))))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(match_operand:SI 1 "const_int_operand" ""))
+   (set (match_operand:SI 2 "arith_reg_dest" "")
+	 (sign_extend:SI
+	 (mem:HI (plus:SI (match_dup 0)
+			  (match_operand:SI 3 "arith_reg_operand" "")))))]
+  "TARGET_SH2A
+   && sh_legitimate_index_p (HImode, operands[1], true, true)
+   && (peep2_reg_dead_p (2, operands[0])
+       || REGNO (operands[0]) == REGNO (operands[2]))"
+  [(set (match_dup 2)
+	(sign_extend:SI (mem:HI (plus:SI (match_dup 3) (match_dup 1)))))]
+  "")
+
+;; Fold sequence:
+;;	mov.{b,w} @(r0,r15),r0
+;;	mov r0,r3
+;; into:
+;;	mov.{b,w} @(r0,r15),r3
+;;
+;; This can happen when initially a displacement address is picked, where
+;; the destination reg is fixed to r0, and then the address is transformed
+;; into 'r0 + reg'.
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(sign_extend:SI
+	 (mem:QI (plus:SI (match_operand:SI 1 "arith_reg_operand" "")
+			  (match_operand:SI 2 "arith_reg_operand" "")))))
+   (set (match_operand:QI 3 "arith_reg_dest" "")
+	(match_operand:QI 4 "arith_reg_operand" ""))]
+  "TARGET_SH1
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 3)
+	(mem:QI (plus:SI (match_dup 1) (match_dup 2))))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(sign_extend:SI
+	 (mem:HI (plus:SI (match_operand:SI 1 "arith_reg_operand" "")
+			  (match_operand:SI 2 "arith_reg_operand" "")))))
+   (set (match_operand:HI 3 "arith_reg_dest" "")
+	(match_operand:HI 4 "arith_reg_operand" ""))]
+  "TARGET_SH1
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 3)
+	(mem:HI (plus:SI (match_dup 1) (match_dup 2))))]
+  "")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SF (match_dup 0))
+	(match_operand:SF 2 "general_movsrc_operand" ""))]
+  "TARGET_SH1 && REGNO (operands[0]) == 0
+   && ((REG_P (operands[2]) && REGNO (operands[2]) < 16)
+       || (GET_CODE (operands[2]) == SUBREG
+	   && REGNO (SUBREG_REG (operands[2])) < 16))
+   && reg_unused_after (operands[0], insn)"
+  "mov.l	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:SF 2 "general_movdst_operand" "")
+
+	(mem:SF (match_dup 0)))]
+  "TARGET_SH1 && REGNO (operands[0]) == 0
+   && ((REG_P (operands[2]) && REGNO (operands[2]) < 16)
+       || (GET_CODE (operands[2]) == SUBREG
+	   && REGNO (SUBREG_REG (operands[2])) < 16))
+   && reg_unused_after (operands[0], insn)"
+  "mov.l	@(%0,%1),%2")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SF (match_dup 0))
+	(match_operand:SF 2 "general_movsrc_operand" ""))]
+  "TARGET_SH2E && REGNO (operands[0]) == 0
+   && ((REG_P (operands[2])
+        && FP_OR_XD_REGISTER_P (REGNO (operands[2])))
+       || (GET_CODE (operands[2]) == SUBREG
+	   && FP_OR_XD_REGISTER_P (REGNO (SUBREG_REG (operands[2])))))
+   && reg_unused_after (operands[0], insn)"
+  "fmov{.s|}	%2,@(%0,%1)")
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 0) (match_operand:SI 1 "register_operand" "r")))
+   (set (match_operand:SF 2 "general_movdst_operand" "")
+
+	(mem:SF (match_dup 0)))]
+  "TARGET_SH2E && REGNO (operands[0]) == 0
+   && ((REG_P (operands[2])
+	&& FP_OR_XD_REGISTER_P (REGNO (operands[2])))
+       || (GET_CODE (operands[2]) == SUBREG
+	   && FP_OR_XD_REGISTER_P (REGNO (SUBREG_REG (operands[2])))))
+   && reg_unused_after (operands[0], insn)"
+  "fmov{.s|}	@(%0,%1),%2")
+
+;; Switch to a new stack with its address in sp_switch (a SYMBOL_REF).
+(define_insn "sp_switch_1"
+  [(set (reg:SI SP_REG) (unspec_volatile [(match_operand:SI 0 "" "")]
+    UNSPECV_SP_SWITCH_B))]
+  "TARGET_SH1"
+{
+  return       "mov.l	r0,@-r15"	"\n"
+	 "	mov.l	%0,r0"		"\n"
+	 "	mov.l	@r0,r0"		"\n"
+	 "	mov.l	r15,@-r0"	"\n"
+	 "	mov	r0,r15";
+}
+  [(set_attr "length" "10")])
+
+;; Switch back to the original stack for interrupt functions with the
+;; sp_switch attribute.
+(define_insn "sp_switch_2"
+  [(unspec_volatile [(const_int 0)]
+    UNSPECV_SP_SWITCH_E)]
+  "TARGET_SH1"
+{
+  return       "mov.l	@r15,r15"	"\n"
+	 "	mov.l	@r15+,r0";
+}
+  [(set_attr "length" "4")])
+
+;; -------------------------------------------------------------------------
+;; Integer vector moves
+;; -------------------------------------------------------------------------
+
+(define_expand "movv8qi"
+  [(set (match_operand:V8QI 0 "general_movdst_operand" "")
+	(match_operand:V8QI 1 "general_movsrc_operand" ""))]
+  "TARGET_SHMEDIA"
+{
+  prepare_move_operands (operands, V8QImode);
+})
+
+(define_insn "movv8qi_i"
+  [(set (match_operand:V8QI 0 "general_movdst_operand" "=r,r,r,rl,m")
+	(match_operand:V8QI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], V8QImode)
+       || sh_register_operand (operands[1], V8QImode))"
+  "@
+	add	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media")
+   (set_attr "length" "4,4,16,4,4")])
+
+(define_split
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "")
+	(subreg:V8QI (const_int 0) 0))]
+  "TARGET_SHMEDIA"
+  [(set (match_dup 0)
+	(const_vector:V8QI [(const_int 0) (const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0) (const_int 0)
+			    (const_int 0) (const_int 0)]))])
+
+(define_split
+  [(set (match_operand 0 "arith_reg_dest" "")
+	(match_operand 1 "sh_rep_vec" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && sh_vector_mode_supported_p (GET_MODE (operands[0]))
+   && GET_MODE_SIZE (GET_MODE (operands[0])) == 8
+   && (XVECEXP (operands[1], 0, 0) != const0_rtx
+       || XVECEXP (operands[1], 0, 1) != const0_rtx)
+   && (XVECEXP (operands[1], 0, 0) != constm1_rtx
+       || XVECEXP (operands[1], 0, 1) != constm1_rtx)"
+  [(set (match_dup 0) (match_dup 1))
+   (match_dup 2)]
+{
+  int unit_size = GET_MODE_UNIT_SIZE (GET_MODE (operands[1]));
+  rtx elt1 = XVECEXP (operands[1], 0, 1);
+
+  if (unit_size > 2)
+    operands[2] = gen_mshflo_l (operands[0], operands[0], operands[0]);
+  else
+    {
+      if (unit_size < 2)
+	operands[0] = gen_rtx_REG (V4HImode, true_regnum (operands[0]));
+      operands[2] = gen_mperm_w0 (operands[0], operands[0]);
+    }
+  operands[0] = gen_rtx_REG (DImode, true_regnum (operands[0]));
+  operands[1] = XVECEXP (operands[1], 0, 0);
+  if (unit_size < 2)
+    {
+      if (CONST_INT_P (operands[1]) && CONST_INT_P (elt1))
+	operands[1]
+	  = GEN_INT (TARGET_LITTLE_ENDIAN
+		     ? (INTVAL (operands[1]) & 0xff) + (INTVAL (elt1) << 8)
+		     : (INTVAL (operands[1]) << 8) + (INTVAL (elt1) & 0xff));
+      else
+	{
+	  operands[0] = gen_rtx_REG (V2QImode, true_regnum (operands[0]));
+	  operands[1]
+	    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, operands[1], elt1));
+	}
+    }
+})
+
+(define_split
+  [(set (match_operand 0 "arith_reg_dest" "")
+	(match_operand 1 "sh_const_vec" ""))]
+  "TARGET_SHMEDIA && reload_completed
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])
+   && sh_vector_mode_supported_p (GET_MODE (operands[0]))"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx v = operands[1];
+  enum machine_mode new_mode
+    = mode_for_size (GET_MODE_BITSIZE (GET_MODE (v)), MODE_INT, 0);
+
+  operands[0] = gen_rtx_REG (new_mode, true_regnum (operands[0]));
+  operands[1]
+    = simplify_subreg (new_mode, operands[1], GET_MODE (operands[1]), 0);
+})
+
+(define_expand "movv2hi"
+  [(set (match_operand:V2HI 0 "general_movdst_operand" "")
+	(match_operand:V2HI 1 "general_movsrc_operand" ""))]
+  "TARGET_SHMEDIA"
+{
+  prepare_move_operands (operands, V2HImode);
+})
+
+(define_insn "movv2hi_i"
+  [(set (match_operand:V2HI 0 "general_movdst_operand" "=r,r,r,rl,m")
+	(match_operand:V2HI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], V2HImode)
+       || sh_register_operand (operands[1], V2HImode))"
+  "@
+	add.l	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.l	%m1, %0
+	st%M0.l	%m0, %N1"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media")
+   (set_attr "length" "4,4,16,4,4")
+   (set (attr "highpart")
+	(cond [(match_test "sh_contains_memref_p (insn)")
+	       (const_string "user")]
+	      (const_string "ignore")))])
+
+(define_expand "movv4hi"
+  [(set (match_operand:V4HI 0 "general_movdst_operand" "")
+	(match_operand:V4HI 1 "general_movsrc_operand" ""))]
+  "TARGET_SHMEDIA"
+{
+  prepare_move_operands (operands, V4HImode);
+})
+
+(define_insn "movv4hi_i"
+  [(set (match_operand:V4HI 0 "general_movdst_operand" "=r,r,r,rl,m")
+	(match_operand:V4HI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], V4HImode)
+       || sh_register_operand (operands[1], V4HImode))"
+  "@
+	add	%1, r63, %0
+	movi	%1, %0
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media")
+   (set_attr "length" "4,4,16,4,4")
+   (set_attr "highpart" "depend")])
+
+(define_expand "movv2si"
+  [(set (match_operand:V2SI 0 "general_movdst_operand" "")
+	(match_operand:V2SI 1 "general_movsrc_operand" ""))]
+  "TARGET_SHMEDIA"
+{
+  prepare_move_operands (operands, V2SImode);
+})
+
+(define_insn "movv2si_i"
+  [(set (match_operand:V2SI 0 "general_movdst_operand" "=r,r,r,rl,m")
+	(match_operand:V2SI 1 "general_movsrc_operand" "r,I16CssZ,nW,m,rlZ"))]
+  "TARGET_SHMEDIA
+   && (register_operand (operands[0], V2SImode)
+       || sh_register_operand (operands[1], V2SImode))"
+  "@
+	add	%1, r63, %0
+	#
+	#
+	ld%M1.q	%m1, %0
+	st%M0.q	%m0, %N1"
+  [(set_attr "type"   "arith_media,arith_media,*,load_media,store_media")
+   (set_attr "length" "4,4,16,4,4")
+   (set_attr "highpart" "depend")])
+
+;; -------------------------------------------------------------------------
+;; Multimedia Intrinsics
+;; -------------------------------------------------------------------------
+
+(define_insn "absv2si2"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(abs:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mabs.l	%1, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "absv4hi2"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(abs:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mabs.w	%1, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "addv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(plus:V2SI (match_operand:V2SI 1 "arith_reg_operand" "%r")
+		   (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madd.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "addv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(plus:V4HI (match_operand:V4HI 1 "arith_reg_operand" "%r")
+		   (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madd.w	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn_and_split "addv2hi3"
+  [(set (match_operand:V2HI 0 "arith_reg_dest" "=r")
+	(plus:V2HI (match_operand:V2HI 1 "extend_reg_operand" "%r")
+		   (match_operand:V2HI 2 "extend_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "#"
+  "TARGET_SHMEDIA"
+  [(const_int 0)]
+{
+  rtx src0 = simplify_gen_subreg (V4HImode, operands[1], V2HImode, 0);
+  rtx src1 = simplify_gen_subreg (V4HImode, operands[2], V2HImode, 0);
+  rtx v4hi_dst = simplify_gen_subreg (V4HImode, operands[0], V2HImode, 0);
+  rtx di_dst = simplify_gen_subreg (DImode, operands[0], V2HImode, 0);
+  rtx si_dst = simplify_gen_subreg (SImode, operands[0], V2HImode, 0);
+
+  emit_insn (gen_addv4hi3 (v4hi_dst, src0, src1));
+  emit_insn (gen_truncdisi2 (si_dst, di_dst));
+  DONE;
+}
+  [(set_attr "highpart" "must_split")])
+
+(define_insn "ssaddv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_plus:V2SI (match_operand:V2SI 1 "arith_reg_operand" "%r")
+		      (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madds.l	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "usaddv8qi3"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(us_plus:V8QI (match_operand:V8QI 1 "arith_reg_operand" "%r")
+		      (match_operand:V8QI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madds.ub	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "ssaddv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_plus:V4HI (match_operand:V4HI 1 "arith_reg_operand" "%r")
+		      (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "madds.w	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpeqv8qi"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(neg:V8QI (eq:V8QI
+		    (match_operand:V8QI 1 "arith_reg_or_0_operand" "%rZ")
+		    (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpeq.b	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpeqv2si"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(neg:V2SI (eq:V2SI
+		    (match_operand:V2SI 1 "arith_reg_or_0_operand" "%rZ")
+		    (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpeq.l	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpeqv4hi"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(neg:V4HI (eq:V4HI
+		    (match_operand:V4HI 1 "arith_reg_or_0_operand" "%rZ")
+		    (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpeq.w	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpgtuv8qi"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(neg:V8QI (gtu:V8QI
+		    (match_operand:V8QI 1 "arith_reg_or_0_operand" "%rZ")
+		    (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpgt.ub	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpgtv2si"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(neg:V2SI (gt:V2SI
+		    (match_operand:V2SI 1 "arith_reg_or_0_operand" "%rZ")
+		    (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpgt.l	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "negcmpgtv4hi"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(neg:V4HI (gt:V4HI
+		    (match_operand:V4HI 1 "arith_reg_or_0_operand" "%rZ")
+		    (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcmpgt.w	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mcmv"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			(match_operand:DI 2 "arith_reg_operand" "r"))
+		(and:DI (match_operand:DI 3 "arith_reg_operand" "0")
+			(not:DI (match_dup 2)))))]
+  "TARGET_SHMEDIA"
+  "mcmv	%N1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mcnvs_lw"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_concat:V4HI
+	 (ss_truncate:V2HI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ"))
+	 (ss_truncate:V2HI
+	   (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcnvs.lw	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")])
+
+(define_insn "mcnvs_wb"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(vec_concat:V8QI
+	 (ss_truncate:V4QI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ"))
+	 (ss_truncate:V4QI
+	   (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcnvs.wb	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")])
+
+(define_insn "mcnvs_wub"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(vec_concat:V8QI
+	 (us_truncate:V4QI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ"))
+	 (us_truncate:V4QI
+	   (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mcnvs.wub	%N1, %N2, %0"
+  [(set_attr "type" "mcmp_media")])
+
+(define_insn "mextr_rl"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (lshiftrt:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			     (match_operand:HI 3 "mextr_bit_offset" "i"))
+		(ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			   (match_operand:HI 4 "mextr_bit_offset" "i"))))]
+  "TARGET_SHMEDIA && INTVAL (operands[3]) + INTVAL (operands[4]) == 64"
+{
+  static char templ[21];
+  sprintf (templ, "mextr%d	%%N1, %%N2, %%0",
+	   (int) INTVAL (operands[3]) >> 3);
+  return templ;
+}
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*mextr_lr"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			   (match_operand:HI 3 "mextr_bit_offset" "i"))
+		(lshiftrt:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			     (match_operand:HI 4 "mextr_bit_offset" "i"))))]
+  "TARGET_SHMEDIA && INTVAL (operands[3]) + INTVAL (operands[4]) == 64"
+{
+  static char templ[21];
+  sprintf (templ, "mextr%d	%%N2, %%N1, %%0",
+	   (int) INTVAL (operands[4]) >> 3);
+  return templ;
+}
+  [(set_attr "type" "arith_media")])
+
+; mextrN can be modelled with vec_select / vec_concat, but the selection
+; vector then varies depending on endianness.
+(define_expand "mextr1"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (1 * 8), GEN_INT (7 * 8)));
+  DONE;
+})
+
+(define_expand "mextr2"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (2 * 8), GEN_INT (6 * 8)));
+  DONE;
+})
+
+(define_expand "mextr3"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (3 * 8), GEN_INT (5 * 8)));
+  DONE;
+})
+
+(define_expand "mextr4"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (4 * 8), GEN_INT (4 * 8)));
+  DONE;
+})
+
+(define_expand "mextr5"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (5 * 8), GEN_INT (3 * 8)));
+  DONE;
+})
+
+(define_expand "mextr6"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (6 * 8), GEN_INT (2 * 8)));
+  DONE;
+})
+
+(define_expand "mextr7"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mextr_rl (operands[0], operands[1], operands[2],
+			   GEN_INT (7 * 8), GEN_INT (1 * 8)));
+  DONE;
+})
+
+(define_expand "mmacfx_wl"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V2HI 1 "extend_reg_operand" "")
+   (match_operand:V2HI 2 "extend_reg_operand" "")
+   (match_operand:V2SI 3 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mmacfx_wl_i (operands[0], operands[3],
+			      operands[1], operands[2]));
+  DONE;
+})
+
+;; This could be highpart ignore if it only had inputs 2 or 3, but input 1
+;; is depend
+(define_insn "mmacfx_wl_i"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_plus:V2SI
+	 (match_operand:V2SI 1 "arith_reg_operand" "0")
+	 (ss_truncate:V2SI
+	  (ashift:V2DI
+	   (sign_extend:V2DI
+	    (mult:V2SI
+	     (sign_extend:V2SI (match_operand:V2HI 2 "extend_reg_operand" "r"))
+	     (sign_extend:V2SI (match_operand:V2HI 3 "extend_reg_operand" "r"))))
+	   (const_int 1)))))]
+  "TARGET_SHMEDIA"
+  "mmacfx.wl	%2, %3, %0"
+  [(set_attr "type" "mac_media")
+   (set_attr "highpart" "depend")])
+
+(define_expand "mmacnfx_wl"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V2HI 1 "extend_reg_operand" "")
+   (match_operand:V2HI 2 "extend_reg_operand" "")
+   (match_operand:V2SI 3 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mmacnfx_wl_i (operands[0], operands[3],
+			       operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mmacnfx_wl_i"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_minus:V2SI
+	 (match_operand:V2SI 1 "arith_reg_operand" "0")
+	 (ss_truncate:V2SI
+	  (ashift:V2DI
+	   (sign_extend:V2DI
+	    (mult:V2SI
+	     (sign_extend:V2SI (match_operand:V2HI 2 "extend_reg_operand" "r"))
+	     (sign_extend:V2SI (match_operand:V2HI 3 "extend_reg_operand" "r"))))
+	   (const_int 1)))))]
+  "TARGET_SHMEDIA"
+  "mmacnfx.wl	%2, %3, %0"
+  [(set_attr "type" "mac_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mulv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(mult:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")
+		   (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mmul.l	%1, %2, %0"
+  [(set_attr "type" "d2mpy_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mulv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(mult:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")
+		   (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mmul.w	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mmulfx_l"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V2SI
+	 (ashiftrt:V2DI
+	  (mult:V2DI
+	   (sign_extend:V2DI (match_operand:V2SI 1 "arith_reg_operand" "r"))
+	   (sign_extend:V2DI (match_operand:V2SI 2 "arith_reg_operand" "r")))
+	  (const_int 31))))]
+  "TARGET_SHMEDIA"
+  "mmulfx.l	%1, %2, %0"
+  [(set_attr "type" "d2mpy_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mmulfx_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V4HI
+	 (ashiftrt:V4SI
+	  (mult:V4SI
+	   (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	   (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r")))
+	  (const_int 15))))]
+  "TARGET_SHMEDIA"
+  "mmulfx.w	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mmulfxrp_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V4HI
+	 (ashiftrt:V4SI
+	  (plus:V4SI
+	   (mult:V4SI
+	    (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	    (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r")))
+	   (const_int 16384))
+	  (const_int 15))))]
+  "TARGET_SHMEDIA"
+  "mmulfxrp.w	%1, %2, %0"
+  [(set_attr "type" "dmpy_media")
+   (set_attr "highpart" "depend")])
+
+
+(define_expand "mmulhi_wl"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_operand" "")
+   (match_operand:V4HI 2 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mmul23_wl : gen_mmul01_wl)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "mmullo_wl"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_operand" "")
+   (match_operand:V4HI 2 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mmul01_wl : gen_mmul23_wl)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mmul23_wl"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(vec_select:V2SI
+	 (mult:V4SI
+	  (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	  (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r")))
+	 (parallel [(const_int 2) (const_int 3)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mmulhi.wl	%1, %2, %0"
+	  : "mmullo.wl	%1, %2, %0");
+}
+  [(set_attr "type" "dmpy_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "big") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mmul01_wl"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(vec_select:V2SI
+	 (mult:V4SI
+	  (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	  (sign_extend:V4SI (match_operand:V4HI 2 "arith_reg_operand" "r")))
+	 (parallel [(const_int 0) (const_int 1)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mmullo.wl	%1, %2, %0"
+	  : "mmulhi.wl	%1, %2, %0");
+}
+  [(set_attr "type" "dmpy_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "little") (const_string "ignore")]
+	 (const_string "user")))])
+
+
+(define_expand "mmulsum_wq"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_operand" "")
+   (match_operand:V4HI 2 "arith_reg_operand" "")
+   (match_operand:DI 3 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_mmulsum_wq_i (operands[0], operands[3],
+			       operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mmulsum_wq_i"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(plus:DI (match_operand:DI 1 "arith_reg_operand" "0")
+	 (plus:DI
+	  (plus:DI
+	   (vec_select:DI
+	    (mult:V4DI
+	     (sign_extend:V4DI (match_operand:V4HI 2 "arith_reg_operand" "r"))
+	     (sign_extend:V4DI (match_operand:V4HI 3 "arith_reg_operand" "r")))
+	    (parallel [(const_int 0)]))
+	   (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2))
+				     (sign_extend:V4DI (match_dup 3)))
+			  (parallel [(const_int 1)])))
+	  (plus:DI
+	   (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2))
+				     (sign_extend:V4DI (match_dup 3)))
+			  (parallel [(const_int 2)]))
+	   (vec_select:DI (mult:V4DI (sign_extend:V4DI (match_dup 2))
+				     (sign_extend:V4DI (match_dup 3)))
+			  (parallel [(const_int 3)]))))))]
+  "TARGET_SHMEDIA"
+  "mmulsum.wq	%2, %3, %0"
+  [(set_attr "type" "mac_media")])
+
+(define_expand "mperm_w"
+  [(match_operand:V4HI 0 "arith_reg_dest" "=r")
+   (match_operand:V4HI 1 "arith_reg_operand" "r")
+   (match_operand:QI 2 "extend_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mperm_w_little : gen_mperm_w_big)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+; This use of vec_select isn't exactly correct according to rtl.texi
+; (because not constant), but it seems a straightforward extension.
+(define_insn "mperm_w_little"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (match_operand:V4HI 1 "arith_reg_operand" "r")
+	 (parallel
+	  [(zero_extract:QI (match_operand:QI 2 "extend_reg_or_0_operand" "rZ")
+			    (const_int 2) (const_int 0))
+	   (zero_extract:QI (match_dup 2) (const_int 2) (const_int 2))
+	   (zero_extract:QI (match_dup 2) (const_int 2) (const_int 4))
+	   (zero_extract:QI (match_dup 2) (const_int 2) (const_int 6))])))]
+  "TARGET_SHMEDIA && TARGET_LITTLE_ENDIAN"
+  "mperm.w	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "mperm_w_big"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (match_operand:V4HI 1 "arith_reg_operand" "r")
+	 (parallel
+	  [(zero_extract:QI (not:QI (match_operand:QI 2
+				     "extend_reg_or_0_operand" "rZ"))
+			    (const_int 2) (const_int 0))
+	   (zero_extract:QI (not:QI (match_dup 2)) (const_int 2) (const_int 2))
+	   (zero_extract:QI (not:QI (match_dup 2)) (const_int 2) (const_int 4))
+	   (zero_extract:QI (not:QI (match_dup 2))
+			    (const_int 2) (const_int 6))])))]
+  "TARGET_SHMEDIA && TARGET_BIG_ENDIAN"
+  "mperm.w	%1, %N2, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "mperm_w0"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_duplicate:V4HI (truncate:HI (match_operand 1
+					  "trunc_hi_operand" "r"))))]
+  "TARGET_SHMEDIA"
+  "mperm.w	%1, r63, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_expand "msad_ubq"
+  [(match_operand:DI 0 "arith_reg_dest" "")
+   (match_operand:V8QI 1 "arith_reg_or_0_operand" "")
+   (match_operand:V8QI 2 "arith_reg_or_0_operand" "")
+   (match_operand:DI 3 "arith_reg_operand" "")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn (gen_msad_ubq_i (operands[0], operands[3],
+			     operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "msad_ubq_i"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(plus:DI
+	 (plus:DI
+	  (plus:DI
+	   (plus:DI
+	    (match_operand:DI 1 "arith_reg_operand" "0")
+	    (abs:DI (vec_select:DI
+		     (minus:V8DI
+		      (zero_extend:V8DI
+		       (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))
+		      (zero_extend:V8DI
+		       (match_operand:V8QI 3 "arith_reg_or_0_operand" "rZ")))
+		     (parallel [(const_int 0)]))))
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 1)]))))
+	  (plus:DI
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 2)])))
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 3)])))))
+	 (plus:DI
+	  (plus:DI
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 4)])))
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 5)]))))
+	  (plus:DI
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 6)])))
+	   (abs:DI (vec_select:DI (minus:V8DI (zero_extend:V8DI (match_dup 2))
+					      (zero_extend:V8DI (match_dup 3)))
+				  (parallel [(const_int 7)])))))))]
+  "TARGET_SHMEDIA"
+  "msad.ubq	%N2, %N3, %0"
+  [(set_attr "type" "mac_media")])
+
+(define_insn "mshalds_l"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V2SI
+	 (ashift:V2DI
+	  (sign_extend:V2DI (match_operand:V2SI 1 "arith_reg_operand" "r"))
+	  (and:DI (match_operand:DI 2 "arith_reg_operand" "r")
+		  (const_int 31)))))]
+  "TARGET_SHMEDIA"
+  "mshalds.l	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mshalds_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_truncate:V4HI
+	 (ashift:V4SI
+	  (sign_extend:V4SI (match_operand:V4HI 1 "arith_reg_operand" "r"))
+	  (and:DI (match_operand:DI 2 "arith_reg_operand" "r")
+		  (const_int 15)))))]
+  "TARGET_SHMEDIA"
+  "mshalds.w	%1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "ashrv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ashiftrt:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")
+		       (match_operand:DI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshard.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "ashrv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ashiftrt:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")
+		       (match_operand:DI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshard.w	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "mshards_q"
+  [(set (match_operand:HI 0 "arith_reg_dest" "=r")
+	(ss_truncate:HI
+	 (ashiftrt:DI (match_operand:DI 1 "arith_reg_operand" "r")
+		      (match_operand:DI 2 "arith_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mshards.q	%1, %N2, %0"
+  [(set_attr "type" "mcmp_media")])
+
+(define_expand "mshfhi_b"
+  [(match_operand:V8QI 0 "arith_reg_dest" "")
+   (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_b : gen_mshf0_b)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "mshflo_b"
+  [(match_operand:V8QI 0 "arith_reg_dest" "")
+   (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_b : gen_mshf4_b)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mshf4_b"
+  [(set
+    (match_operand:V8QI 0 "arith_reg_dest" "=r")
+    (vec_select:V8QI
+     (vec_concat:V16QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))
+     (parallel [(const_int 4) (const_int 12) (const_int 5) (const_int 13)
+		(const_int 6) (const_int 14) (const_int 7) (const_int 15)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mshfhi.b	%N1, %N2, %0"
+	  : "mshflo.b	%N1, %N2, %0");
+}
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "big") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mshf0_b"
+  [(set
+    (match_operand:V8QI 0 "arith_reg_dest" "=r")
+    (vec_select:V8QI
+     (vec_concat:V16QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V8QI 2 "arith_reg_or_0_operand" "rZ"))
+     (parallel [(const_int 0) (const_int 8) (const_int 1) (const_int 9)
+		(const_int 2) (const_int 10) (const_int 3) (const_int 11)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mshflo.b	%N1, %N2, %0"
+	  : "mshfhi.b	%N1, %N2, %0");
+}
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "little") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_expand "mshfhi_l"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_l : gen_mshf0_l)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "mshflo_l"
+  [(match_operand:V2SI 0 "arith_reg_dest" "")
+   (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_l : gen_mshf4_l)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mshf4_l"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(vec_select:V2SI
+	 (vec_concat:V4SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+			  (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 1) (const_int 3)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mshfhi.l	%N1, %N2, %0"
+	  : "mshflo.l	%N1, %N2, %0");
+}
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "big") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mshf0_l"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(vec_select:V2SI
+	 (vec_concat:V4SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+			  (match_operand:V2SI 2 "arith_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 0) (const_int 2)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mshflo.l	%N1, %N2, %0"
+	  : "mshfhi.l	%N1, %N2, %0");
+}
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "little") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_expand "mshfhi_w"
+  [(match_operand:V4HI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf4_w : gen_mshf0_w)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "mshflo_w"
+  [(match_operand:V4HI 0 "arith_reg_dest" "")
+   (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+   (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ")]
+  "TARGET_SHMEDIA"
+{
+  emit_insn ((TARGET_LITTLE_ENDIAN ? gen_mshf0_w : gen_mshf4_w)
+	     (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "mshf4_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (vec_concat:V8HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+			  (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mshfhi.w	%N1, %N2, %0"
+	  : "mshflo.w	%N1, %N2, %0");
+}
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "big") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mshf0_w"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (vec_concat:V8HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+			  (match_operand:V4HI 2 "arith_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))]
+  "TARGET_SHMEDIA"
+{
+  return (TARGET_LITTLE_ENDIAN
+	  ? "mshflo.w	%N1, %N2, %0"
+	  : "mshfhi.w	%N1, %N2, %0");
+}
+  [(set_attr "type" "arith_media")
+   (set (attr "highpart")
+	(cond [(eq_attr "endian" "little") (const_string "ignore")]
+	 (const_string "user")))])
+
+(define_insn "mshflo_w_x"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(vec_select:V4HI
+	 (vec_concat:V4HI (match_operand:V2HI 1 "extend_reg_or_0_operand" "rZ")
+			  (match_operand:V2HI 2 "extend_reg_or_0_operand" "rZ"))
+	 (parallel [(const_int 2) (const_int 0) (const_int 3) (const_int 1)])))]
+  "TARGET_SHMEDIA"
+  "mshflo.w	%N1, %N2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; These are useful to expand ANDs and as combiner patterns.
+(define_insn_and_split "mshfhi_l_di"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r,f")
+	(ior:DI (lshiftrt:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ,f")
+			     (const_int 32))
+		(and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ,?f")
+			(const_int -4294967296))))]
+  "TARGET_SHMEDIA"
+  "@
+	mshfhi.l	%N1, %N2, %0
+	#"
+  "TARGET_SHMEDIA && reload_completed
+   && ! GENERAL_REGISTER_P (true_regnum (operands[0]))"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 5) (match_dup 6))]
+{
+  operands[3] = gen_lowpart (SImode, operands[0]);
+  operands[4] = gen_highpart (SImode, operands[1]);
+  operands[5] = gen_highpart (SImode, operands[0]);
+  operands[6] = gen_highpart (SImode, operands[2]);
+}
+  [(set_attr "type" "arith_media")])
+
+(define_insn "*mshfhi_l_di_rev"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			(const_int -4294967296))
+		(lshiftrt:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			     (const_int 32))))]
+  "TARGET_SHMEDIA"
+  "mshfhi.l	%N2, %N1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_split
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(ior:DI (zero_extend:DI (match_operand:SI 1
+					      "extend_reg_or_0_operand" ""))
+		(and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "")
+			(const_int -4294967296))))
+   (clobber (match_operand:DI 3 "arith_reg_dest" ""))]
+  "TARGET_SHMEDIA"
+  [(const_int 0)]
+{
+  emit_insn (gen_ashldi3_media (operands[3],
+				simplify_gen_subreg (DImode, operands[1],
+						     SImode, 0),
+				GEN_INT (32)));
+  emit_insn (gen_mshfhi_l_di (operands[0], operands[3], operands[2]));
+  DONE;
+})
+
+(define_insn "mshflo_l_di"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (and:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			(const_int 4294967295))
+		(ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			   (const_int 32))))]
+
+  "TARGET_SHMEDIA"
+  "mshflo.l	%N1, %N2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "*mshflo_l_di_rev"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			   (const_int 32))
+		(and:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			(const_int 4294967295))))]
+
+  "TARGET_SHMEDIA"
+  "mshflo.l	%N2, %N1, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+;; Combiner pattern for trampoline initialization.
+(define_insn_and_split "*double_shori"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0")
+			   (const_int 32))
+		(match_operand:DI 2 "const_int_operand" "n")))]
+  "TARGET_SHMEDIA
+   && ! (INTVAL (operands[2]) & ~(unsigned HOST_WIDE_INT) 0xffffffffUL)"
+  "#"
+  "rtx_equal_p (operands[0], operands[1])"
+  [(const_int 0)]
+{
+  HOST_WIDE_INT v = INTVAL (operands[2]);
+
+  emit_insn (gen_shori_media (operands[0], operands[0], GEN_INT (v >> 16)));
+  emit_insn (gen_shori_media (operands[0], operands[0], GEN_INT (v & 65535)));
+  DONE;
+}
+  [(set_attr "highpart" "ignore")])
+
+(define_insn "*mshflo_l_di_x"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (zero_extend:DI (match_operand:SI 1 "extend_reg_or_0_operand"
+				 "rZ"))
+		(ashift:DI (match_operand:DI 2 "arith_reg_or_0_operand" "rZ")
+			   (const_int 32))))]
+  "TARGET_SHMEDIA"
+  "mshflo.l	%N1, %N2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn_and_split "concat_v2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=r,f,f?")
+;;	(vec_concat:V2SF (match_operand:SF 1 "register_operand" "rZ,0,f")
+	(vec_concat:V2SF (match_operand:SF 1 "register_operand" "rZ,f,f")
+			 (match_operand:SF 2 "register_operand" "rZ,f,f")))]
+  "TARGET_SHMEDIA"
+  "@
+	mshflo.l	%N1, %N2, %0
+	#
+	#"
+  "TARGET_SHMEDIA && reload_completed
+   && ! GENERAL_REGISTER_P (true_regnum (operands[0]))"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 4) (match_dup 2))]
+{
+  operands[3] = simplify_gen_subreg (SFmode, operands[0], V2SFmode, 0);
+  operands[4] = simplify_gen_subreg (SFmode, operands[0], V2SFmode, 4);
+}
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "*mshflo_l_di_x_rev"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(ior:DI (ashift:DI (match_operand:DI 1 "arith_reg_or_0_operand" "rZ")
+			   (const_int 32))
+		(zero_extend:DI
+		  (match_operand:SI 2 "extend_reg_or_0_operand" "rZ"))))]
+  "TARGET_SHMEDIA"
+  "mshflo.l	%N2, %N1, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "ignore")])
+
+(define_insn "ashlv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ashift:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "shift_count_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshlld.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_split
+  [(set (match_operand 0 "any_register_operand" "")
+	(match_operator 3 "shift_operator"
+	  [(match_operand 1 "any_register_operand" "")
+	   (match_operand 2 "shift_count_reg_operand" "")]))]
+  "TARGET_SHMEDIA && ! register_operand (operands[2], VOIDmode)"
+  [(set (match_dup 0) (match_dup 3))]
+{
+  rtx count = operands[2];
+  enum machine_mode outer_mode = GET_MODE (operands[2]), inner_mode;
+
+  while (GET_CODE (count) == ZERO_EXTEND || GET_CODE (count) == SIGN_EXTEND
+	 || (GET_CODE (count) == SUBREG && SUBREG_BYTE (count) == 0)
+	 || GET_CODE (count) == TRUNCATE)
+    count = XEXP (count, 0);
+  inner_mode = GET_MODE (count);
+  count = simplify_gen_subreg (outer_mode, count, inner_mode,
+			       subreg_lowpart_offset (outer_mode, inner_mode));
+  operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), GET_MODE (operands[3]),
+				operands[1], count);
+})
+
+(define_insn "ashlv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ashift:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "shift_count_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshlld.w	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "lshrv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(lshiftrt:V2SI (match_operand:V2SI 1 "arith_reg_operand" "r")
+		     (match_operand:DI 2 "shift_count_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshlrd.l	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "lshrv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(lshiftrt:V4HI (match_operand:V4HI 1 "arith_reg_operand" "r")
+		       (match_operand:DI 2 "shift_count_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "mshlrd.w	%1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "subv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(minus:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+		    (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msub.l	%N1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "subv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(minus:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+		    (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msub.w	%N1, %2, %0"
+  [(set_attr "type" "arith_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn_and_split "subv2hi3"
+  [(set (match_operand:V2HI 0 "arith_reg_dest" "=r")
+	(minus:V2HI (match_operand:V2HI 1 "arith_reg_or_0_operand" "rZ")
+		   (match_operand:V2HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "#"
+  "TARGET_SHMEDIA"
+  [(const_int 0)]
+{
+  rtx src0 = simplify_gen_subreg (V4HImode, operands[1], V2HImode, 0);
+  rtx src1 = simplify_gen_subreg (V4HImode, operands[2], V2HImode, 0);
+  rtx v4hi_dst = simplify_gen_subreg (V4HImode, operands[0], V2HImode, 0);
+  rtx di_dst = simplify_gen_subreg (DImode, operands[0], V2HImode, 0);
+  rtx si_dst = simplify_gen_subreg (SImode, operands[0], V2HImode, 0);
+
+  emit_insn (gen_subv4hi3 (v4hi_dst, src0, src1));
+  emit_insn (gen_truncdisi2 (si_dst, di_dst));
+  DONE;
+}
+  [(set_attr "highpart" "must_split")])
+
+(define_insn "sssubv2si3"
+  [(set (match_operand:V2SI 0 "arith_reg_dest" "=r")
+	(ss_minus:V2SI (match_operand:V2SI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V2SI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msubs.l	%N1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "ussubv8qi3"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(us_minus:V8QI (match_operand:V8QI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V8QI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msubs.ub	%N1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+(define_insn "sssubv4hi3"
+  [(set (match_operand:V4HI 0 "arith_reg_dest" "=r")
+	(ss_minus:V4HI (match_operand:V4HI 1 "arith_reg_or_0_operand" "rZ")
+		       (match_operand:V4HI 2 "arith_reg_operand" "r")))]
+  "TARGET_SHMEDIA"
+  "msubs.w	%N1, %2, %0"
+  [(set_attr "type" "mcmp_media")
+   (set_attr "highpart" "depend")])
+
+;; -------------------------------------------------------------------------
+;; Floating Point Intrinsics
+;; -------------------------------------------------------------------------
+
+(define_insn "fcosa_s"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(unspec:SF [(match_operand:SI 1 "fp_arith_reg_operand" "f")]
+		   UNSPEC_FCOSA))]
+  "TARGET_SHMEDIA"
+  "fcosa.s	%1, %0"
+  [(set_attr "type" "atrans_media")])
+
+(define_insn "fsina_s"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(unspec:SF [(match_operand:SI 1 "fp_arith_reg_operand" "f")]
+		   UNSPEC_FSINA))]
+  "TARGET_SHMEDIA"
+  "fsina.s	%1, %0"
+  [(set_attr "type" "atrans_media")])
+
+(define_insn "fipr"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(plus:SF (plus:SF (vec_select:SF (mult:V4SF (match_operand:V4SF 1
+						    "fp_arith_reg_operand" "f")
+						   (match_operand:V4SF 2
+						    "fp_arith_reg_operand" "f"))
+					 (parallel [(const_int 0)]))
+			  (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2))
+					 (parallel [(const_int 1)])))
+		 (plus:SF (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2))
+					 (parallel [(const_int 2)]))
+			  (vec_select:SF (mult:V4SF (match_dup 1) (match_dup 2))
+					 (parallel [(const_int 3)])))))]
+  "TARGET_SHMEDIA"
+  "fipr.s	%1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "fsrra_s"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(unspec:SF [(match_operand:SF 1 "fp_arith_reg_operand" "f")]
+		   UNSPEC_FSRRA))]
+  "TARGET_SHMEDIA"
+  "fsrra.s	%1, %0"
+  [(set_attr "type" "atrans_media")])
+
+(define_insn "ftrv"
+  [(set (match_operand:V4SF 0 "fp_arith_reg_operand" "=f")
+	(plus:V4SF
+	 (plus:V4SF
+	  (mult:V4SF
+	   (vec_select:V4SF (match_operand:V16SF 1 "fp_arith_reg_operand" "f")
+			    (parallel [(const_int 0) (const_int 5)
+				       (const_int 10) (const_int 15)]))
+	   (match_operand:V4SF 2 "fp_arith_reg_operand" "f"))
+	  (mult:V4SF
+	   (vec_select:V4SF (match_dup 1)
+			    (parallel [(const_int 4) (const_int 9)
+				       (const_int 14) (const_int 3)]))
+	   (vec_select:V4SF (match_dup 2)
+			    (parallel [(const_int 1) (const_int 2)
+				       (const_int 3) (const_int 0)]))))
+	 (plus:V4SF
+	  (mult:V4SF
+	   (vec_select:V4SF (match_dup 1)
+			    (parallel [(const_int 8) (const_int 13)
+				       (const_int 2) (const_int 7)]))
+	   (vec_select:V4SF (match_dup 2)
+			    (parallel [(const_int 2) (const_int 3)
+				       (const_int 0) (const_int 1)])))
+	  (mult:V4SF
+	   (vec_select:V4SF (match_dup 1)
+			    (parallel [(const_int 12) (const_int 1)
+				       (const_int 6) (const_int 11)]))
+	   (vec_select:V4SF (match_dup 2)
+			    (parallel [(const_int 3) (const_int 0)
+				       (const_int 1) (const_int 2)]))))))]
+  "TARGET_SHMEDIA"
+  "ftrv.s %1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
+(define_insn "ldhi_l"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extract:SI
+	 (mem:SI (plus:SI (ior:SI (match_operand:QI 1 "ua_address_operand" "p")
+				  (const_int 3))
+			  (const_int -3)))
+	 (plus:SI (and:SI (match_dup 1) (const_int 3)) (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA32"
+  "ldhi.l	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldhi_q"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (match_operand:QI 1 "ua_address_operand" "p")
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (match_dup 1) (const_int 7)) (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA32"
+  "ldhi.q	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn_and_split "*ldhi_q_comb0"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (plus:SI
+				    (match_operand:SI 1 "register_operand" "r")
+				    (match_operand:SI 2 "ua_offset" "I06"))
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (match_dup 1) (const_int 7))
+		  (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & 7) == 0"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_ldhi_q (operands[0],
+			 gen_rtx_PLUS (SImode, operands[1], operands[2])));
+  DONE;
+})
+
+(define_insn_and_split "*ldhi_q_comb1"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (plus:SI
+				    (match_operand:SI 1 "register_operand" "r")
+				    (match_operand:SI 2 "ua_offset" "I06"))
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (plus:SI (match_dup 1)
+				   (match_operand:SI 3 "ua_offset" "I06"))
+			  (const_int 7))
+		  (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & -8)
+   && (INTVAL (operands[2]) & 7) == INTVAL (operands[3])"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_ldhi_q (operands[0],
+			 gen_rtx_PLUS (SImode, operands[1], operands[2])));
+  DONE;
+})
+
+(define_insn "ldlo_l"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extract:SI
+	 (mem:SI (and:SI (match_operand:QI 1 "ua_address_operand" "p")
+			 (const_int -4)))
+	 (minus:SI (const_int 4) (and:SI (match_dup 1) (const_int 3)))
+	 (and:SI (match_dup 1) (const_int 3))))]
+  "TARGET_SHMEDIA32"
+  "ldlo.l	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldlo_q"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (and:SI (match_operand:QI 1 "ua_address_operand" "p")
+			 (const_int -8)))
+	 (minus:SI (const_int 8) (and:SI (match_dup 1) (const_int 7)))
+	 (and:SI (match_dup 1) (const_int 7))))]
+  "TARGET_SHMEDIA32"
+  "ldlo.q	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn_and_split "*ldlo_q_comb0"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+				  (match_operand:SI 2 "ua_offset" "I06"))
+			 (const_int -8)))
+	 (minus:SI (const_int 8) (and:SI (match_dup 1) (const_int 7)))
+	 (and:SI (match_dup 1) (const_int 7))))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & 7) == 0"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_ldlo_q (operands[0],
+			 gen_rtx_PLUS (SImode, operands[1], operands[2])));
+  DONE;
+})
+
+(define_insn_and_split "*ldlo_q_comb1"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (and:SI (plus:SI (match_operand:SI 1 "register_operand" "r")
+				  (match_operand:SI 2 "ua_offset" "I06"))
+			 (const_int -8)))
+	 (minus:SI (const_int 8)
+		   (and:SI (plus:SI (match_dup 1)
+				    (match_operand:SI 3 "ua_offset" "I06"))
+			   (const_int 7)))
+	 (and:SI (plus:SI (match_dup 1) (match_dup 3)) (const_int 7))))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[2]) & -8)
+   && (INTVAL (operands[2]) & 7) == INTVAL (operands[3])"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_ldlo_q (operands[0],
+			 gen_rtx_PLUS (SImode, operands[1], operands[2])));
+  DONE;
+})
+
+(define_insn "sthi_l"
+  [(set (zero_extract:SI
+	 (mem:SI (plus:SI (ior:SI (match_operand:QI 0 "ua_address_operand" "p")
+				  (const_int 3))
+			  (const_int -3)))
+	 (plus:SI (and:SI (match_dup 0) (const_int 3)) (const_int 1))
+	 (const_int 0))
+	(match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32"
+  "sthi.l	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+;; All unaligned stores are considered to be 'narrow' because they typically
+;; operate on less that a quadword, and when they operate on a full quadword,
+;; the vanilla store high / store low sequence will cause a stall if not
+;; scheduled apart.
+(define_insn "sthi_q"
+  [(set (zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (match_operand:QI 0 "ua_address_operand" "p")
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (match_dup 0) (const_int 7)) (const_int 1))
+	 (const_int 0))
+	(match_operand:DI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32"
+  "sthi.q	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn_and_split "*sthi_q_comb0"
+  [(set (zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (plus:SI
+				    (match_operand:SI 0 "register_operand" "r")
+				    (match_operand:SI 1 "ua_offset" "I06"))
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (match_dup 0) (const_int 7)) (const_int 1))
+	 (const_int 0))
+	(match_operand:DI 2 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == 0"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_sthi_q (gen_rtx_PLUS (SImode, operands[0], operands[1]),
+			 operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "*sthi_q_comb1"
+  [(set (zero_extract:DI
+	 (mem:DI (plus:SI (ior:SI (plus:SI
+				    (match_operand:SI 0 "register_operand" "r")
+				    (match_operand:SI 1 "ua_offset" "I06"))
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:SI (and:SI (plus:SI (match_dup 0)
+				   (match_operand:SI 2 "ua_offset" "I06"))
+			  (const_int 7))
+		  (const_int 1))
+	 (const_int 0))
+	(match_operand:DI 3 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & -8)
+   && (INTVAL (operands[1]) & 7) == INTVAL (operands[2])"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_sthi_q (gen_rtx_PLUS (SImode, operands[0], operands[1]),
+			 operands[3]));
+  DONE;
+})
+
+;; This is highpart user because the address is used as full 64 bit.
+(define_insn "stlo_l"
+  [(set (zero_extract:SI
+	 (mem:SI (and:SI (match_operand:QI 0 "ua_address_operand" "p")
+			 (const_int -4)))
+	 (minus:SI (const_int 4) (and:SI (match_dup 0) (const_int 3)))
+	 (and:SI (match_dup 0) (const_int 3)))
+	(match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32"
+  "stlo.l	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "stlo_q"
+  [(set (zero_extract:DI
+	 (mem:DI (and:SI (match_operand:QI 0 "ua_address_operand" "p")
+			 (const_int -8)))
+	 (minus:SI (const_int 8) (and:SI (match_dup 0) (const_int 7)))
+	 (and:SI (match_dup 0) (const_int 7)))
+	(match_operand:DI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32"
+  "stlo.q	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn_and_split "*stlo_q_comb0"
+  [(set (zero_extract:DI
+	 (mem:DI (and:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
+				  (match_operand:SI 1 "ua_offset" "I06"))
+			 (const_int -8)))
+	 (minus:SI (const_int 8) (and:SI (match_dup 0) (const_int 7)))
+	 (and:SI (match_dup 0) (const_int 7)))
+	(match_operand:DI 2 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == 0"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_stlo_q (gen_rtx_PLUS (SImode, operands[0], operands[1]),
+			 operands[2]));
+  DONE;
+})
+
+(define_insn_and_split "*stlo_q_comb1"
+  [(set (zero_extract:DI
+	 (mem:DI (and:SI (plus:SI (match_operand:SI 0 "register_operand" "r")
+				  (match_operand:SI 1 "ua_offset" "I06"))
+			 (const_int -8)))
+	 (minus:SI (const_int 8)
+		   (and:SI (plus:SI (match_dup 0)
+				    (match_operand:SI 2 "ua_offset" "I06"))
+			   (const_int 7)))
+	 (and:SI (plus:SI (match_dup 0) (match_dup 2)) (const_int 7)))
+	(match_operand:DI 3 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA32 && (INTVAL (operands[1]) & 7) == INTVAL (operands[2])"
+  "#"
+  ""
+  [(pc)]
+{
+  emit_insn (gen_stlo_q (gen_rtx_PLUS (SImode, operands[0], operands[1]),
+			 operands[3]));
+   DONE;
+})
+
+(define_insn "ldhi_l64"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extract:SI
+	 (mem:SI (plus:DI (ior:DI (match_operand:QI 1 "ua_address_operand" "p")
+				  (const_int 3))
+			  (const_int -3)))
+	 (plus:DI (and:DI (match_dup 1) (const_int 3)) (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA64"
+  "ldhi.l	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldhi_q64"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (plus:DI (ior:DI (match_operand:QI 1 "ua_address_operand" "p")
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:DI (and:DI (match_dup 1) (const_int 7)) (const_int 1))
+	 (const_int 0)))]
+  "TARGET_SHMEDIA64"
+  "ldhi.q	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldlo_l64"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extract:SI
+	 (mem:SI (and:DI (match_operand:QI 1 "ua_address_operand" "p")
+			 (const_int -4)))
+	 (minus:DI (const_int 4) (and:DI (match_dup 1) (const_int 3)))
+	 (and:DI (match_dup 1) (const_int 3))))]
+  "TARGET_SHMEDIA64"
+  "ldlo.l	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "ldlo_q64"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extract:DI
+	 (mem:DI (and:DI (match_operand:QI 1 "ua_address_operand" "p")
+			 (const_int -8)))
+	 (minus:DI (const_int 8) (and:DI (match_dup 1) (const_int 7)))
+	 (and:DI (match_dup 1) (const_int 7))))]
+  "TARGET_SHMEDIA64"
+  "ldlo.q	%U1, %0"
+  [(set_attr "type" "load_media")])
+
+(define_insn "sthi_l64"
+  [(set (zero_extract:SI
+	 (mem:SI (plus:DI (ior:DI (match_operand:QI 0 "ua_address_operand" "p")
+				  (const_int 3))
+			  (const_int -3)))
+	 (plus:DI (and:DI (match_dup 0) (const_int 3)) (const_int 1))
+	 (const_int 0))
+	(match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA64"
+  "sthi.l	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "sthi_q64"
+  [(set (zero_extract:DI
+	 (mem:DI (plus:DI (ior:DI (match_operand:QI 0 "ua_address_operand" "p")
+				  (const_int 7))
+			  (const_int -7)))
+	 (plus:DI (and:DI (match_dup 0) (const_int 7)) (const_int 1))
+	 (const_int 0))
+	(match_operand:DI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA64"
+  "sthi.q	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "stlo_l64"
+  [(set (zero_extract:SI
+	 (mem:SI (and:DI (match_operand:QI 0 "ua_address_operand" "p")
+			 (const_int -4)))
+	 (minus:DI (const_int 4) (and:DI (match_dup 0) (const_int 3)))
+	 (and:DI (match_dup 0) (const_int 3)))
+	(match_operand:SI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA64"
+  "stlo.l	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "stlo_q64"
+  [(set (zero_extract:DI
+	 (mem:DI (and:DI (match_operand:QI 0 "ua_address_operand" "p")
+			 (const_int -8)))
+	 (minus:DI (const_int 8) (and:DI (match_dup 0) (const_int 7)))
+	 (and:DI (match_dup 0) (const_int 7)))
+	(match_operand:DI 1 "arith_reg_operand" "r"))]
+  "TARGET_SHMEDIA64"
+  "stlo.q	%U0, %1"
+  [(set_attr "type" "ustore_media")])
+
+(define_insn "nsb"
+  [(set (match_operand:QI 0 "arith_reg_dest" "=r")
+	(unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")]
+		   UNSPEC_NSB))]
+  "TARGET_SHMEDIA"
+  "nsb	%1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "nsbsi"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+	(zero_extend:SI
+	 (unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")]
+		    UNSPEC_NSB)))]
+  "TARGET_SHMEDIA"
+  "nsb	%1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_insn "nsbdi"
+  [(set (match_operand:DI 0 "arith_reg_dest" "=r")
+	(zero_extend:DI
+	 (unspec:QI [(match_operand:DI 1 "arith_reg_operand" "r")]
+		    UNSPEC_NSB)))]
+  "TARGET_SHMEDIA"
+  "nsb	%1, %0"
+  [(set_attr "type" "arith_media")])
+
+(define_expand "ffsdi2"
+  [(set (match_operand:DI 0 "arith_reg_dest" "")
+	(ffs:DI (match_operand:DI 1 "arith_reg_operand" "")))]
+  "TARGET_SHMEDIA"
+{
+  rtx scratch = gen_reg_rtx (DImode);
+  rtx last;
+
+  emit_insn (gen_adddi3 (scratch, operands[1], constm1_rtx));
+  emit_insn (gen_xordi3 (scratch, operands[1], scratch));
+  emit_insn (gen_lshrdi3_media (scratch, scratch, const1_rtx));
+  emit_insn (gen_nsbdi (scratch, scratch));
+  emit_insn (gen_adddi3 (scratch, scratch, GEN_INT (-64)));
+  emit_insn (gen_movdicc_false (scratch, operands[1], const0_rtx, scratch));
+  last = emit_insn (gen_subdi3 (operands[0], const0_rtx, scratch));
+  set_unique_reg_note (last, REG_EQUAL, gen_rtx_FFS (DImode, operands[0]));
+
+  DONE;
+})
+
+(define_expand "ffssi2"
+  [(set (match_operand:SI 0 "arith_reg_dest" "")
+	(ffs:SI (match_operand:SI 1 "arith_reg_operand" "")))]
+  "TARGET_SHMEDIA"
+{
+  rtx scratch = gen_reg_rtx (SImode);
+  rtx discratch = gen_reg_rtx (DImode);
+  rtx last;
+
+  emit_insn (gen_adddi3 (discratch,
+			 simplify_gen_subreg (DImode, operands[1], SImode, 0),
+			 constm1_rtx));
+  emit_insn (gen_andcdi3 (discratch,
+			  simplify_gen_subreg (DImode, operands[1], SImode, 0),
+			  discratch));
+  emit_insn (gen_nsbsi (scratch, discratch));
+  last = emit_insn (gen_subsi3 (operands[0],
+				force_reg (SImode, GEN_INT (63)), scratch));
+  set_unique_reg_note (last, REG_EQUAL, gen_rtx_FFS (SImode, operands[0]));
+
+  DONE;
+})
+
+(define_insn "byterev"
+  [(set (match_operand:V8QI 0 "arith_reg_dest" "=r")
+	(vec_select:V8QI (match_operand:V8QI 1 "arith_reg_operand" "r")
+			 (parallel [(const_int 7) (const_int 6) (const_int 5)
+				    (const_int 4) (const_int 3) (const_int 2)
+				    (const_int 1) (const_int 0)])))]
+  "TARGET_SHMEDIA"
+  "byterev	%1, %0"
+  [(set_attr "type" "arith_media")])
+
+;; In user mode, the "pref" instruction will raise a RADDERR exception
+;; for accesses to [0x80000000,0xffffffff].  This makes it an unsuitable
+;; implementation of __builtin_prefetch for VxWorks RTPs.
+(define_expand "prefetch"
+  [(prefetch (match_operand 0 "address_operand" "")
+	     (match_operand:SI 1 "const_int_operand" "")
+	     (match_operand:SI 2 "const_int_operand" ""))]
+  "(TARGET_SH2A || TARGET_SH3 || TARGET_SH5)
+   && (TARGET_SHMEDIA || ! TARGET_VXWORKS_RTP)")
+
+(define_insn "*prefetch"
+  [(prefetch (match_operand:SI 0 "register_operand" "r")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  "(TARGET_SH2A || TARGET_SH3 || TARGET_SHCOMPACT) && ! TARGET_VXWORKS_RTP"
+  "pref	@%0"
+  [(set_attr "type" "other")])
+
+(define_insn "*prefetch_media"
+  [(prefetch (match_operand:QI 0 "address_operand" "p")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  "TARGET_SHMEDIA"
+{
+  operands[0] = gen_rtx_MEM (QImode, operands[0]);
+  output_asm_insn ("ld%M0.b	%m0,r63", operands);
+  return "";
+}
+  [(set_attr "type" "other")])
+
+(define_insn "alloco_i"
+  [(set (mem:BLK (match_operand:QI 0 "cache_address_operand" "p"))
+	(unspec:BLK [(const_int 0)] UNSPEC_ALLOCO))]
+  "TARGET_SHMEDIA32"
+{
+  rtx xops[2];
+
+  if (GET_CODE (operands[0]) == PLUS)
+    {
+      xops[0] = XEXP (operands[0], 0);
+      xops[1] = XEXP (operands[0], 1);
+    }
+  else
+    {
+      xops[0] = operands[0];
+      xops[1] = const0_rtx;
+    }
+  output_asm_insn ("alloco	%0, %1", xops);
+  return "";
+}
+  [(set_attr "type" "other")])
+
+(define_split
+  [(set (match_operand 0 "any_register_operand" "")
+	(match_operand 1 "" ""))]
+  "TARGET_SHMEDIA && reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  int n_changes = 0;
+
+  for_each_rtx (&operands[1], shmedia_cleanup_truncate, &n_changes);
+  if (!n_changes)
+    FAIL;
+})
+
+;; -------------------------------------------------------------------------
+;; Stack Protector Patterns
+;; -------------------------------------------------------------------------
+
+(define_expand "stack_protect_set"
+  [(set (match_operand 0 "memory_operand" "")
+	(match_operand 1 "memory_operand" ""))]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      if (TARGET_SHMEDIA64)
+	emit_insn (gen_stack_protect_set_di_media (operands[0], operands[1]));
+      else
+	emit_insn (gen_stack_protect_set_si_media (operands[0], operands[1]));
+    }
+  else
+    emit_insn (gen_stack_protect_set_si (operands[0], operands[1]));
+
+  DONE;
+})
+
+(define_insn "stack_protect_set_si"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  "!TARGET_SHMEDIA"
+{
+  return       "mov.l	%1,%2"	"\n"
+	 "	mov.l	%2,%0"	"\n"
+	 "	mov	#0,%2";
+}
+  [(set_attr "type" "other")
+   (set_attr "length" "6")])
+
+(define_insn "stack_protect_set_si_media"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))]
+  "TARGET_SHMEDIA"
+{
+  return       "ld%M1.l	%m1,%2"	"\n"
+	 "	st%M0.l	%m0,%2"	"\n"
+	 "	movi	0,%2";
+}
+  [(set_attr "type" "other")
+   (set_attr "length" "12")])
+
+(define_insn "stack_protect_set_di_media"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:DI 2 "=&r") (const_int 0))]
+  "TARGET_SHMEDIA64"
+{
+  return       "ld%M1.q	%m1,%2"	"\n"
+	 "	st%M0.q	%m0,%2"	"\n"
+	 "	movi	0,%2";
+}
+  [(set_attr "type" "other")
+   (set_attr "length" "12")])
+
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")
+   (match_operand 2 "" "")]
+  ""
+{
+  if (TARGET_SHMEDIA)
+    {
+      rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
+      rtx test;
+
+      test = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
+      if (TARGET_SHMEDIA64)
+	{
+	  emit_insn (gen_stack_protect_test_di_media (tmp, operands[0],
+						      operands[1]));
+	  emit_jump_insn (gen_cbranchdi4 (test, tmp, const0_rtx, operands[2]));
+	}
+      else
+	{
+	  emit_insn (gen_stack_protect_test_si_media (tmp, operands[0],
+						      operands[1]));
+	  emit_jump_insn (gen_cbranchsi4 (test, tmp, const0_rtx, operands[2]));
+	}
+    }
+  else
+    {
+      emit_insn (gen_stack_protect_test_si (operands[0], operands[1]));
+      emit_jump_insn (gen_branch_true (operands[2]));
+    }
+
+  DONE;
+})
+
+(define_insn "stack_protect_test_si"
+  [(set (reg:SI T_REG)
+	(unspec:SI [(match_operand:SI 0 "memory_operand" "m")
+		    (match_operand:SI 1 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+  (set (match_scratch:SI 2 "=&r") (const_int 0))
+  (set (match_scratch:SI 3 "=&r") (const_int 0))]
+  "!TARGET_SHMEDIA"
+{
+  return       "mov.l	%0,%2"	"\n"
+	 "	mov.l	%1,%3"	"\n"
+	 "	cmp/eq	%2,%3"	"\n"
+	 "	mov	#0,%2"	"\n"
+	 "	mov	#0,%3";
+}
+  [(set_attr "type" "other")
+   (set_attr "length" "10")])
+
+(define_insn "stack_protect_test_si_media"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+		    (match_operand:SI 2 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+  (set (match_scratch:SI 3 "=&r") (const_int 0))]
+  "TARGET_SHMEDIA"
+{
+  return       "ld%M1.l	%m1,%0"		"\n"
+	 "	ld%M2.l	%m2,%3"		"\n"
+	 "	cmpeq	%0,%3,%0"	"\n"
+	 "	movi	0,%3";
+}
+  [(set_attr "type" "other")
+   (set_attr "length" "16")])
+
+(define_insn "stack_protect_test_di_media"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+		    (match_operand:DI 2 "memory_operand" "m")]
+		   UNSPEC_SP_TEST))
+  (set (match_scratch:DI 3 "=&r") (const_int 0))]
+  "TARGET_SHMEDIA64"
+{
+  return       "ld%M1.q	%m1,%0"		"\n"
+	 "	ld%M2.q	%m2,%3"		"\n"
+	 "	cmpeq	%0,%3,%0"	"\n"
+	 "	movi	0,%3";
+}
+  [(set_attr "type" "other")
+   (set_attr "length" "16")])
+
+;; -------------------------------------------------------------------------
+;; Atomic operations
+;; -------------------------------------------------------------------------
+
+(include "sync.md")
diff --git a/gcc-4.9/gcc/config/sh/sh.opt b/gcc-4.9/gcc/config/sh/sh.opt
new file mode 100644
index 000000000..1834c6bde
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh.opt
@@ -0,0 +1,362 @@
+; Options for the SH port of the compiler.
+
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+;; Used for various architecture options.
+Mask(SH_E)
+
+;; Set if the default precision of the FPU is single.
+Mask(FPU_SINGLE)
+
+;; Set if the a double-precision FPU is present but is restricted to
+;; single precision usage only.
+Mask(FPU_SINGLE_ONLY)
+
+;; Set if we should generate code using type 2A insns.
+Mask(HARD_SH2A)
+
+;; Set if we should generate code using type 2A DF insns.
+Mask(HARD_SH2A_DOUBLE)
+
+;; Set if compiling for SH4 hardware (to be used for insn costs etc.)
+Mask(HARD_SH4)
+
+;; Set if we should generate code for a SH5 CPU (either ISA).
+Mask(SH5)
+
+;; Set if we should save all target registers.
+Mask(SAVE_ALL_TARGET_REGS)
+
+m1
+Target RejectNegative Mask(SH1) Condition(SUPPORT_SH1)
+Generate SH1 code
+
+m2
+Target RejectNegative Mask(SH2) Condition(SUPPORT_SH2)
+Generate SH2 code
+
+m2a
+Target RejectNegative Condition(SUPPORT_SH2A)
+Generate default double-precision SH2a-FPU code
+
+m2a-nofpu
+Target RejectNegative Condition(SUPPORT_SH2A_NOFPU)
+Generate SH2a FPU-less code
+
+m2a-single
+Target RejectNegative Condition(SUPPORT_SH2A_SINGLE)
+Generate default single-precision SH2a-FPU code
+
+m2a-single-only
+Target RejectNegative Condition(SUPPORT_SH2A_SINGLE_ONLY)
+Generate only single-precision SH2a-FPU code
+
+m2e
+Target RejectNegative Condition(SUPPORT_SH2E)
+Generate SH2e code
+
+m3
+Target RejectNegative Mask(SH3) Condition(SUPPORT_SH3)
+Generate SH3 code
+
+m3e
+Target RejectNegative Condition(SUPPORT_SH3E)
+Generate SH3e code
+
+m4
+Target RejectNegative Mask(SH4) Condition(SUPPORT_SH4)
+Generate SH4 code
+
+m4-100
+Target RejectNegative Condition(SUPPORT_SH4)
+Generate SH4-100 code
+
+m4-200
+Target RejectNegative Condition(SUPPORT_SH4)
+Generate SH4-200 code
+
+;; TARGET_SH4_300 indicates if we have the ST40-300 instruction set and
+;; pipeline - irrespective of ABI.
+m4-300
+Target RejectNegative Condition(SUPPORT_SH4) Var(TARGET_SH4_300)
+Generate SH4-300 code
+
+m4-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate SH4 FPU-less code
+
+m4-100-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate SH4-100 FPU-less code
+
+m4-200-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate SH4-200 FPU-less code
+
+m4-300-nofpu
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Var(TARGET_SH4_300)
+Generate SH4-300 FPU-less code
+
+m4-340
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU) Var(TARGET_SH4_300)
+Generate code for SH4 340 series (MMU/FPU-less)
+;; passes -isa=sh4-nommu-nofpu to the assembler.
+
+m4-400
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate code for SH4 400 series (MMU/FPU-less)
+;; passes -isa=sh4-nommu-nofpu to the assembler.
+
+m4-500
+Target RejectNegative Condition(SUPPORT_SH4_NOFPU)
+Generate code for SH4 500 series (FPU-less).
+;; passes -isa=sh4-nofpu to the assembler.
+
+m4-single
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE)
+Generate default single-precision SH4 code
+
+m4-100-single
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE)
+Generate default single-precision SH4-100 code
+
+m4-200-single
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE)
+Generate default single-precision SH4-200 code
+
+m4-300-single
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE) Var(TARGET_SH4_300)
+Generate default single-precision SH4-300 code
+
+m4-single-only
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY)
+Generate only single-precision SH4 code
+
+m4-100-single-only
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY)
+Generate only single-precision SH4-100 code
+
+m4-200-single-only
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY)
+Generate only single-precision SH4-200 code
+
+m4-300-single-only
+Target RejectNegative Condition(SUPPORT_SH4_SINGLE_ONLY) Var(TARGET_SH4_300)
+Generate only single-precision SH4-300 code
+
+m4a
+Target RejectNegative Mask(SH4A) Condition(SUPPORT_SH4A)
+Generate SH4a code
+
+m4a-nofpu
+Target RejectNegative Condition(SUPPORT_SH4A_NOFPU)
+Generate SH4a FPU-less code
+
+m4a-single
+Target RejectNegative Condition(SUPPORT_SH4A_SINGLE)
+Generate default single-precision SH4a code
+
+m4a-single-only
+Target RejectNegative Condition(SUPPORT_SH4A_SINGLE_ONLY)
+Generate only single-precision SH4a code
+
+m4al
+Target RejectNegative Condition(SUPPORT_SH4AL)
+Generate SH4al-dsp code
+
+m5-32media
+Target RejectNegative Condition(SUPPORT_SH5_32MEDIA)
+Generate 32-bit SHmedia code
+
+m5-32media-nofpu
+Target RejectNegative Condition(SUPPORT_SH5_32MEDIA_NOFPU)
+Generate 32-bit FPU-less SHmedia code
+
+m5-64media
+Target RejectNegative Condition(SUPPORT_SH5_64MEDIA)
+Generate 64-bit SHmedia code
+
+m5-64media-nofpu
+Target RejectNegative Condition(SUPPORT_SH5_64MEDIA_NOFPU)
+Generate 64-bit FPU-less SHmedia code
+
+m5-compact
+Target RejectNegative Condition(SUPPORT_SH5_32MEDIA)
+Generate SHcompact code
+
+m5-compact-nofpu
+Target RejectNegative Condition(SUPPORT_SH5_32MEDIA_NOFPU)
+Generate FPU-less SHcompact code
+
+maccumulate-outgoing-args
+Target Report Var(TARGET_ACCUMULATE_OUTGOING_ARGS) Init(1)
+Reserve space for outgoing arguments in the function prologue
+
+madjust-unroll
+Target Ignore
+Does nothing.  Preserved for backward compatibility.
+
+mb
+Target Report RejectNegative InverseMask(LITTLE_ENDIAN)
+Generate code in big endian mode
+
+mbigtable
+Target Report RejectNegative Mask(BIGTABLE)
+Generate 32-bit offsets in switch tables
+
+mbitops
+Target Report RejectNegative Mask(BITOPS)
+Generate bit instructions
+
+mbranch-cost=
+Target RejectNegative Joined UInteger Var(sh_branch_cost) Init(-1)
+Cost to assume for a branch insn
+
+mzdcbranch
+Target Var(TARGET_ZDCBRANCH)
+Assume that zero displacement conditional branches are fast
+
+mcbranchdi
+Target Undocumented Var(TARGET_CBRANCHDI4) Warn(%qs is deprecated and has no effect)
+Enable cbranchdi4 pattern
+
+mcmpeqdi
+Target Undocumented Var(TARGET_CMPEQDI_T) Warn(%qs is deprecated and has no effect)
+Emit cmpeqdi_t pattern even when -mcbranchdi is in effect.
+
+mcut2-workaround
+Target RejectNegative Var(TARGET_SH5_CUT2_WORKAROUND)
+Enable SH5 cut2 workaround
+
+mdalign
+Target Report RejectNegative Mask(ALIGN_DOUBLE)
+Align doubles at 64-bit boundaries
+
+mdiv=
+Target RejectNegative Joined Var(sh_div_str) Init("")
+Division strategy, one of: call, call2, fp, inv, inv:minlat, inv20u, inv20l, inv:call, inv:call2, inv:fp, call-div1, call-fp, call-table
+
+mdivsi3_libfunc=
+Target RejectNegative Joined Var(sh_divsi3_libfunc) Init("")
+Specify name for 32 bit signed division function
+
+mfmovd
+Target RejectNegative Mask(FMOVD)
+Enable the use of 64-bit floating point registers in fmov instructions.  See -mdalign if 64-bit alignment is required.
+
+mfixed-range=
+Target RejectNegative Joined Var(sh_fixed_range_str)
+Specify range of registers to make fixed
+
+mgettrcost=
+Target RejectNegative Joined UInteger Var(sh_gettrcost) Init(-1)
+Cost to assume for gettr insn
+
+mhitachi
+Target Report RejectNegative Mask(HITACHI)
+Follow Renesas (formerly Hitachi) / SuperH calling conventions
+
+mieee
+Target Var(TARGET_IEEE)
+Increase the IEEE compliance for floating-point comparisons
+
+mindexed-addressing
+Target Report Mask(ALLOW_INDEXED_ADDRESS) Condition(SUPPORT_ANY_SH5_32MEDIA)
+Enable the use of the indexed addressing mode for SHmedia32/SHcompact
+
+minline-ic_invalidate
+Target Report Var(TARGET_INLINE_IC_INVALIDATE)
+inline code to invalidate instruction cache entries after setting up nested function trampolines
+
+minvalid-symbols
+Target Report Mask(INVALID_SYMBOLS) Condition(SUPPORT_ANY_SH5)
+Assume symbols might be invalid
+
+misize
+Target Report RejectNegative Mask(DUMPISIZE)
+Annotate assembler instructions with estimated addresses
+
+ml
+Target Report RejectNegative Mask(LITTLE_ENDIAN)
+Generate code in little endian mode
+
+mnomacsave
+Target Report RejectNegative Mask(NOMACSAVE)
+Mark MAC register as call-clobbered
+
+;; ??? This option is not useful, but is retained in case there are people
+;; who are still relying on it.  It may be deleted in the future.
+mpadstruct
+Target Report RejectNegative Mask(PADSTRUCT)
+Make structs a multiple of 4 bytes (warning: ABI altered)
+
+mprefergot
+Target Report RejectNegative Mask(PREFERGOT)
+Emit function-calls using global offset table when generating PIC
+
+mpt-fixed
+Target Report Mask(PT_FIXED) Condition(SUPPORT_ANY_SH5)
+Assume pt* instructions won't trap
+
+mrelax
+Target Report RejectNegative Mask(RELAX)
+Shorten address references during linking
+
+mrenesas
+Target Mask(HITACHI)
+Follow Renesas (formerly Hitachi) / SuperH calling conventions
+
+msoft-atomic
+Target Undocumented Alias(matomic-model=, soft-gusa, none)
+Deprecated.  Use -matomic= instead to select the atomic model
+
+matomic-model=
+Target Report RejectNegative Joined Var(sh_atomic_model_str)
+Specify the model for atomic operations
+
+mtas
+Target Report RejectNegative Var(TARGET_ENABLE_TAS)
+Use tas.b instruction for __atomic_test_and_set
+
+mspace
+Target RejectNegative Alias(Os)
+Deprecated.  Use -Os instead
+
+multcost=
+Target RejectNegative Joined UInteger Var(sh_multcost) Init(-1)
+Cost to assume for a multiply insn
+
+musermode
+Target Report RejectNegative Var(TARGET_USERMODE)
+Don't generate privileged-mode only code; implies -mno-inline-ic_invalidate if the inline code would not work in user mode.
+
+;; We might want to enable this by default for TARGET_HARD_SH4, because
+;; zero-offset branches have zero latency.  Needs some benchmarking.
+mpretend-cmove
+Target Var(TARGET_PRETEND_CMOVE)
+Pretend a branch-around-a-move is a conditional move.
+
+mfsca
+Target Var(TARGET_FSCA)
+Enable the use of the fsca instruction
+
+mfsrra
+Target Var(TARGET_FSRRA)
+Enable the use of the fsrra instruction
+
diff --git a/gcc-4.9/gcc/config/sh/sh1.md b/gcc-4.9/gcc/config/sh/sh1.md
new file mode 100644
index 000000000..08b212447
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh1.md
@@ -0,0 +1,85 @@
+;; DFA scheduling description for Renesas / SuperH SH.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Load and store instructions save a cycle if they are aligned on a
+;; four byte boundary.  Using a function unit for stores encourages
+;; gcc to separate load and store instructions by one instruction,
+;; which makes it more likely that the linker will be able to word
+;; align them when relaxing.
+
+;; SH-1 scheduling.  This is just a conversion of the old scheduling
+;; model, using define_function_unit.
+
+(define_automaton "sh1")
+(define_cpu_unit "sh1memory,sh1int,sh1mpy,sh1fp" "sh1")
+
+;; Loads have a latency of two.
+;; However, call insns can have a delay slot, so that we want one more
+;; insn to be scheduled between the load of the function address and the call.
+;; This is equivalent to a latency of three.
+;; ADJUST_COST can only properly handle reductions of the cost, so we
+;; use a latency of three here.
+;; We only do this for SImode loads of general registers, to make the work
+;; for ADJUST_COST easier.
+(define_insn_reservation "sh1_load_si" 3
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "load_si,pcload_si"))
+  "sh1memory*2")
+
+(define_insn_reservation "sh1_load_store" 2
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "load,pcload,pload,mem_mac,store,fstore,pstore,mac_mem"))
+  "sh1memory*2")
+
+(define_insn_reservation "sh1_arith3" 3
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "arith3,arith3b"))
+  "sh1int*3")
+
+(define_insn_reservation "sh1_dyn_shift" 2
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "dyn_shift"))
+  "sh1int*2")
+
+(define_insn_reservation "sh1_int" 1
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "!arith3,arith3b,dyn_shift"))
+  "sh1int")
+
+;; ??? These are approximations.
+(define_insn_reservation "sh1_smpy" 2
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "smpy"))
+  "sh1mpy*2")
+
+(define_insn_reservation "sh1_dmpy" 3
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "dmpy"))
+  "sh1mpy*3")
+
+(define_insn_reservation "sh1_fp" 2
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "fp,fpscr_toggle,fp_cmp,fmove"))
+  "sh1fp")
+
+(define_insn_reservation "sh1_fdiv" 13
+  (and (eq_attr "pipe_model" "sh1")
+       (eq_attr "type" "fdiv"))
+  "sh1fp*12")
+
diff --git a/gcc-4.9/gcc/config/sh/sh4-300.md b/gcc-4.9/gcc/config/sh/sh4-300.md
new file mode 100644
index 000000000..c0c0a5c55
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh4-300.md
@@ -0,0 +1,281 @@
+;; DFA scheduling description for ST40-300.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Load and store instructions save a cycle if they are aligned on a
+;; four byte boundary.  Using a function unit for stores encourages
+;; gcc to separate load and store instructions by one instruction,
+;; which makes it more likely that the linker will be able to word
+;; align them when relaxing.
+
+;; The following description models the ST40-300 pipeline using the DFA based
+;; scheduler.
+
+;; Two automata are defined to reduce number of states
+;; which a single large automaton will have. (Factoring)
+
+(define_automaton "sh4_300_inst_pipeline,sh4_300_fpu_pipe")
+
+;; This unit is basically the decode unit of the processor.
+;; Since SH4 is a dual issue machine,it is as if there are two
+;; units so that any insn can be processed by either one
+;; of the decoding unit.
+(define_cpu_unit "sh4_300_pipe_01,sh4_300_pipe_02" "sh4_300_inst_pipeline")
+
+;; The floating point units.
+(define_cpu_unit "sh4_300_fpt,sh4_300_fpu,sh4_300_fds" "sh4_300_fpu_pipe")
+
+;; integer multiplier unit
+(define_cpu_unit "sh4_300_mul" "sh4_300_inst_pipeline")
+
+;; LS unit
+(define_cpu_unit "sh4_300_ls" "sh4_300_inst_pipeline")
+
+;; The address calculator used for branch instructions.
+;; This will be reserved after "issue" of branch instructions
+;; and this is to make sure that no two branch instructions
+;; can be issued in parallel.
+(define_cpu_unit "sh4_300_br" "sh4_300_inst_pipeline")
+
+;; ----------------------------------------------------
+;; This reservation is to simplify the dual issue description.
+
+(define_reservation  "sh4_300_issue"  "sh4_300_pipe_01|sh4_300_pipe_02")
+
+(define_reservation "all" "sh4_300_pipe_01+sh4_300_pipe_02")
+
+;;(define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing")
+
+;; MOV RM,RN / MOV #imm8,RN / STS PR,RN
+(define_insn_reservation "sh4_300_mov" 0
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "move,movi8,prget"))
+  "sh4_300_issue")
+
+;; Fixed STS from MACL / MACH
+(define_insn_reservation "sh4_300_mac_gp" 0
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "mac_gp"))
+  "sh4_300_issue+sh4_300_mul")
+
+;; Fixed LDS to MACL / MACH
+(define_insn_reservation "sh4_300_gp_mac" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "gp_mac"))
+  "sh4_300_issue+sh4_300_mul")
+
+;; Instructions without specific resource requirements with latency 1.
+(define_insn_reservation "sh4_300_simple_arith" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "mt_group,arith,dyn_shift,prset"))
+  "sh4_300_issue")
+
+;; Load and store instructions have no alignment peculiarities for the ST40-300,
+;; but they use the load-store unit, which they share with the fmove type
+;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) .
+;; Loads have a latency of three.
+
+;; Load Store instructions.
+(define_insn_reservation "sh4_300_load" 3
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "load,pcload,load_si,pcload_si,pload"))
+  "sh4_300_issue+sh4_300_ls")
+
+(define_insn_reservation "sh4_300_mac_load" 3
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "mem_mac"))
+  "sh4_300_issue+sh4_300_ls+sh4_300_mul")
+
+(define_insn_reservation "sh4_300_fload" 4
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fload,pcfload"))
+  "sh4_300_issue+sh4_300_ls+sh4_300_fpt")
+
+;; sh_adjust_cost describes the reduced latency of the feeding insns of a store.
+;; The latency of an auto-increment register is 1; the latency of the memory
+;; output is not actually considered here anyway.
+(define_insn_reservation "sh4_300_store" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "store,pstore"))
+  "sh4_300_issue+sh4_300_ls")
+
+(define_insn_reservation "sh4_300_fstore" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fstore"))
+  "sh4_300_issue+sh4_300_ls+sh4_300_fpt")
+
+;; Fixed STS.L from MACL / MACH
+(define_insn_reservation "sh4_300_mac_store" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "mac_mem"))
+  "sh4_300_issue+sh4_300_mul+sh4_300_ls")
+
+(define_insn_reservation "sh4_300_gp_fpul" 2
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "gp_fpul"))
+  "sh4_300_issue+sh4_300_fpt")
+
+(define_insn_reservation "sh4_300_fpul_gp" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fpul_gp"))
+  "sh4_300_issue+sh4_300_fpt")
+
+;; Branch (BF,BF/S,BT,BT/S,BRA)
+;; Branch Far (JMP,RTS,BRAF)
+;; Group:	BR
+;; When displacement is 0 for BF / BT, we have effectively conditional
+;; execution of one instruction, without pipeline disruption.
+;; Otherwise, the latency depends on prediction success.
+;; We can't really do much with the latency, even if we could express it,
+;; but the pairing restrictions are useful to take into account.
+;; ??? If the branch is likely, and not paired with a preceding insn,
+;; or likely and likely not predicted, we might want to fill the delay slot.
+;; However, there appears to be no machinery to make the compiler
+;; recognize these scenarios.
+(define_insn_reservation "sh4_300_branch"  1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "cbranch,jump,return,jump_ind"))
+  "sh4_300_issue+sh4_300_br")
+
+;; RTE
+(define_insn_reservation "sh4_300_return_from_exp" 9
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "rte"))
+  "sh4_300_pipe_01+sh4_300_pipe_02*9")
+
+;; OCBP, OCBWB
+;; Group:	CO
+;; Latency: 	1-5
+;; Issue Rate: 	1
+;; cwb is used for the sequence
+;;	ocbwb  @%0
+;;	extu.w %0,%2
+;;	or     %1,%2
+;;	mov.l  %0,@%2
+;; This description is likely inexact, but this pattern should not actually
+;; appear when compiling for sh4-300; we should use isbi instead.
+;; If a -mtune option is added later, we should use the icache array
+;; dispatch method instead.
+(define_insn_reservation "sh4_300_ocbwb"  3
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "cwb"))
+  "all*3")
+
+;; JSR,BSR,BSRF
+;; Calls have a mandatory delay slot, which we'd like to fill with an insn
+;; that can be paired with the call itself.
+;; Scheduling runs before reorg, so we approximate this by saying that we
+;; want the call to be paired with a preceding insn.
+;; In most cases, the insn that loads the address of the call should have
+;; a nonzero latency (mov rn,rm doesn't make sense since we could use rn
+;; for the address then).  Thus, a preceding insn that can be paired with
+;; a call should be eligible for the delay slot.
+;;
+;; calls introduce a longisch delay that is likely to flush the pipelines
+;; of the caller's instructions.  Ordinary functions tend to end with a
+;; load to restore a register (in the delay slot of rts), while sfuncs
+;; tend to end with an EX or MT insn.  But that is not actually relevant,
+;; since there are no instructions that contend for memory access early.
+;; We could, of course, provide exact scheduling information for specific
+;; sfuncs, if that should prove useful.
+(define_insn_reservation "sh4_300_call" 16
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "call,sfunc"))
+  "sh4_300_issue+sh4_300_br,all*15")
+
+;; FMOV.S / FMOV.D
+(define_insn_reservation "sh4_300_fmov" 1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fmove"))
+  "sh4_300_issue+sh4_300_fpt")
+
+;; LDS to FPSCR
+(define_insn_reservation "sh4_300_fpscr_load" 8
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "gp_fpscr"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fpt")
+
+;; LDS.L to FPSCR
+(define_insn_reservation "sh4_300_fpscr_load_mem" 8
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type"  "mem_fpscr"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fpt+sh4_300_ls")
+
+
+;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W)
+(define_insn_reservation "multi" 2
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "smpy,dmpy"))
+  "sh4_300_issue+sh4_300_mul")
+
+;; FPCHG, FRCHG, FSCHG
+(define_insn_reservation "fpscr_toggle"  1
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fpscr_toggle"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fpt")
+
+;; FCMP/EQ, FCMP/GT
+(define_insn_reservation "fp_cmp"  3
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fp_cmp,dfp_cmp"))
+  "sh4_300_issue+sh4_300_fpu")
+
+;; Single precision floating point (FADD,FLOAT,FMAC,FMUL,FSUB,FTRC)
+;; Double-precision floating-point (FADD,FCNVDS,FCNVSD,FLOAT,FSUB,FTRC)
+(define_insn_reservation "fp_arith"  6
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fp,ftrc_s,dfp_arith,dfp_conv"))
+  "sh4_300_issue+sh4_300_fpu")
+
+;; Single Precision FDIV/SQRT
+(define_insn_reservation "fp_div" 19
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "fdiv"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*15")
+
+;; Double-precision floating-point FMUL
+(define_insn_reservation "dfp_mul" 9
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "dfp_mul"))
+  "sh4_300_issue+sh4_300_fpu,sh4_300_fpu*3")
+
+;; Double precision FDIV/SQRT
+(define_insn_reservation "dp_div" 35
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "dfdiv"))
+  "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*31")
+
+;; ??? We don't really want these for sh4-300.
+;; this pattern itself is likely to finish in 3 cycles, but also
+;; to disrupt branch prediction for taken branches for the following
+;; condbranch.
+(define_insn_reservation "sh4_300_arith3" 5
+  (and (eq_attr "pipe_model" "sh4_300")
+       (eq_attr "type" "arith3"))
+  "sh4_300_issue,all*4")
+
+;; arith3b insns without brach redirection make use of the 0-offset 0-latency
+;; branch feature, and thus schedule the same no matter if the branch is taken
+;; or not.  If the branch is redirected, the taken branch might take longer,
+;; but then, we don't have to take the next branch.
+;; ??? should we suppress branch redirection for sh4-300 to improve branch
+;; target hit rates?
+(define_insn_reservation "arith3b" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "arith3"))
+  "issue,all")
diff --git a/gcc-4.9/gcc/config/sh/sh4.md b/gcc-4.9/gcc/config/sh/sh4.md
new file mode 100644
index 000000000..0ff6a0b57
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh4.md
@@ -0,0 +1,454 @@
+;; DFA scheduling description for SH4.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Load and store instructions save a cycle if they are aligned on a
+;; four byte boundary.  Using a function unit for stores encourages
+;; gcc to separate load and store instructions by one instruction,
+;; which makes it more likely that the linker will be able to word
+;; align them when relaxing.
+
+;; The following description models the SH4 pipeline using the DFA based
+;; scheduler.  The DFA based description is better way to model a
+;; superscalar pipeline as compared to function unit reservation model.
+;; 1. The function unit based model is oriented to describe at most one
+;;    unit reservation by each insn. It is difficult to model unit reservations
+;;    in multiple pipeline units by same insn.  This can be done using DFA
+;;    based description.
+;; 2. The execution performance of DFA based scheduler does not depend on
+;;    processor complexity.
+;; 3. Writing all unit reservations for an instruction class is a more natural
+;;    description of the pipeline and makes the interface to the hazard
+;;    recognizer simpler than the old function unit based model.
+;; 4. The DFA model is richer and is a part of greater overall framework
+;;    of RCSP.
+
+
+;; Two automata are defined to reduce number of states
+;; which a single large automaton will have. (Factoring)
+(define_automaton "inst_pipeline,fpu_pipe")
+
+;; This unit is basically the decode unit of the processor.
+;; Since SH4 is a dual issue machine,it is as if there are two
+;; units so that any insn can be processed by either one
+;; of the decoding unit.
+(define_cpu_unit "pipe_01,pipe_02" "inst_pipeline")
+
+
+;; The fixed point arithmetic calculator(?? EX Unit).
+(define_cpu_unit  "int" "inst_pipeline")
+
+;; f1_1 and f1_2 are floating point units.Actually there is
+;; a f1 unit which can overlap with other f1 unit but
+;; not another F1 unit.It is as though there were two
+;; f1 units.
+(define_cpu_unit "f1_1,f1_2" "fpu_pipe")
+
+;; The floating point units (except FS - F2 always precedes it.)
+(define_cpu_unit "F0,F1,F2,F3" "fpu_pipe")
+
+;; This is basically the MA unit of SH4
+;; used in LOAD/STORE pipeline.
+(define_cpu_unit "memory" "inst_pipeline")
+
+;; However, there are LS group insns that don't use it, even ones that
+;; complete in 0 cycles.  So we use an extra unit for the issue of LS insns.
+(define_cpu_unit "load_store" "inst_pipeline")
+
+;; The address calculator used for branch instructions.
+;; This will be reserved after "issue" of branch instructions
+;; and this is to make sure that no two branch instructions
+;; can be issued in parallel.
+
+(define_cpu_unit "pcr_addrcalc" "inst_pipeline")
+
+;; ----------------------------------------------------
+;; This reservation is to simplify the dual issue description.
+(define_reservation  "issue"  "pipe_01|pipe_02")
+
+;; This is to express the locking of D stage.
+;; Note that the issue of a CO group insn also effectively locks the D stage.
+(define_reservation  "d_lock" "pipe_01+pipe_02")
+
+;; Every FE instruction but fipr / ftrv starts with issue and this.
+(define_reservation "F01" "F0+F1")
+
+;; This is to simplify description where F1,F2,FS
+;; are used simultaneously.
+(define_reservation "fpu" "F1+F2")
+
+;; This is to highlight the fact that f1
+;; cannot overlap with F1.
+(exclusion_set  "f1_1,f1_2" "F1")
+
+(define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing")
+
+;; Although reg moves have a latency of zero
+;; we need to highlight that they use D stage
+;; for one cycle.
+
+;; Group:	MT
+(define_insn_reservation "reg_mov" 0
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "move"))
+  "issue")
+
+;; Group:	LS
+(define_insn_reservation "freg_mov" 0
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fmove"))
+  "issue+load_store")
+
+;; We don't model all pipeline stages; we model the issue ('D') stage
+;; inasmuch as we allow only two instructions to issue simultaneously,
+;; and CO instructions prevent any simultaneous issue of another instruction.
+;; (This uses pipe_01 and pipe_02).
+;; Double issue of EX insns is prevented by using the int unit in the EX stage.
+;; Double issue of EX / BR insns is prevented by using the int unit /
+;; pcr_addrcalc unit in the EX stage.
+;; Double issue of BR / LS instructions is prevented by using the
+;; pcr_addrcalc / load_store unit in the issue cycle.
+;; Double issue of FE instructions is prevented by using F0 in the first
+;; pipeline stage after the first D stage.
+;; There is no need to describe the [ES]X / [MN]A / S stages after a D stage
+;; (except in the cases outlined above), nor to describe the FS stage after
+;; the F2 stage.
+
+;; Other MT  group instructions(1 step operations)
+;; Group:	MT
+;; Latency: 	1
+;; Issue Rate: 	1
+(define_insn_reservation "mt" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "mt_group"))
+  "issue")
+
+;; Fixed Point Arithmetic Instructions(1 step operations)
+;; Group:	EX
+;; Latency: 	1
+;; Issue Rate: 	1
+(define_insn_reservation "sh4_simple_arith" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "insn_class" "ex_group"))
+  "issue,int")
+
+;; Load and store instructions have no alignment peculiarities for the SH4,
+;; but they use the load-store unit, which they share with the fmove type
+;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) .
+;; Loads have a latency of two.
+;; However, call insns can only paired with a preceding insn, and have
+;; a delay slot, so that we want two more insns to be scheduled between the
+;; load of the function address and the call.  This is equivalent to a
+;; latency of three.
+;; ADJUST_COST can only properly handle reductions of the cost, so we
+;; use a latency of three here, which gets multiplied by 10 to yield 30.
+;; We only do this for SImode loads of general registers, to make the work
+;; for ADJUST_COST easier.
+
+;; Load Store instructions. (MOV.[BWL]@(d,GBR)
+;; Group:	LS
+;; Latency: 	2
+;; Issue Rate: 	1
+(define_insn_reservation "sh4_load" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "load,pcload"))
+  "issue+load_store,nothing,memory")
+
+;; calls / sfuncs need an extra instruction for their delay slot.
+;; Moreover, estimating the latency for SImode loads as 3 will also allow
+;; adjust_cost to meaningfully bump it back up to 3 if they load the shift
+;; count of a dynamic shift.
+(define_insn_reservation "sh4_load_si" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "load_si,pcload_si"))
+  "issue+load_store,nothing,memory")
+
+;; (define_bypass 2 "sh4_load_si" "!sh4_call")
+
+;; The load latency is upped to three higher if the dependent insn does
+;; double precision computation.  We want the 'default' latency to reflect
+;; that increased latency because otherwise the insn priorities won't
+;; allow proper scheduling.
+(define_insn_reservation "sh4_fload" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fload,pcfload"))
+  "issue+load_store,nothing,memory")
+
+;; (define_bypass 2 "sh4_fload" "!")
+
+(define_insn_reservation "sh4_store" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "store,fstore"))
+  "issue+load_store,nothing,memory")
+
+(define_insn_reservation "mac_mem" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "mac_mem"))
+  "d_lock,nothing,memory")
+
+;; Load Store instructions.
+;; Group:	LS
+;; Latency: 	1
+;; Issue Rate: 	1
+(define_insn_reservation "sh4_gp_fpul" 1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "gp_fpul"))
+  "issue+load_store")
+
+;; Load Store instructions.
+;; Group:	LS
+;; Latency: 	3
+;; Issue Rate: 	1
+(define_insn_reservation "sh4_fpul_gp" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fpul_gp"))
+  "issue+load_store")
+
+;; Branch (BF,BF/S,BT,BT/S,BRA)
+;; Group:	BR
+;; Latency when taken: 	2 (or 1)
+;; Issue Rate: 	1
+;; The latency is 1 when displacement is 0.
+;; We can't really do much with the latency, even if we could express it,
+;; but the pairing restrictions are useful to take into account.
+;; ??? If the branch is likely, we might want to fill the delay slot;
+;; if the branch is likely, but not very likely, should we pretend to use
+;; a resource that CO instructions use, to get a pairable delay slot insn?
+(define_insn_reservation "sh4_branch"  1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "cbranch,jump"))
+  "issue+pcr_addrcalc")
+
+;; Branch Far (JMP,RTS,BRAF)
+;; Group:	CO
+;; Latency: 	3
+;; Issue Rate: 	2
+;; ??? Scheduling happens before branch shortening, and hence jmp and braf
+;; can't be distinguished from bra for the "jump" pattern.
+(define_insn_reservation "sh4_return" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "return,jump_ind"))
+         "d_lock*2")
+
+;; RTE
+;; Group:	CO
+;; Latency: 	5
+;; Issue Rate: 	5
+;; this instruction can be executed in any of the pipelines
+;; and blocks the pipeline for next 4 stages.
+(define_insn_reservation "sh4_return_from_exp" 5
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "rte"))
+  "d_lock*5")
+
+;; OCBP, OCBWB
+;; Group:	CO
+;; Latency: 	1-5
+;; Issue Rate: 	1
+;; cwb is used for the sequence
+;;	ocbwb  @%0
+;;	extu.w %0,%2
+;;	or     %1,%2
+;;	mov.l  %0,@%2
+;; ocbwb on its own would be "d_lock,nothing,memory*5"
+(define_insn_reservation "ocbwb"  6
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "cwb"))
+  "d_lock*2,(d_lock+memory)*3,issue+load_store+memory,memory*2")
+
+;; LDS to PR,JSR
+;; Group:	CO
+;; Latency: 	3
+;; Issue Rate: 	2
+;; The SX stage is blocked for last 2 cycles.
+;; OTOH, the only time that has an effect for insns generated by the compiler
+;; is when lds to PR is followed by sts from PR - and that is highly unlikely -
+;; or when we are doing a function call - and we don't do inter-function
+;; scheduling.  For the function call case, it's really best that we end with
+;; something that models an rts.
+(define_insn_reservation "sh4_lds_to_pr" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "prset") )
+  "d_lock*2")
+
+;; calls introduce a longisch delay that is likely to flush the pipelines
+;; of the caller's instructions.  Ordinary functions tend to end with a
+;; load to restore a register (in the delay slot of rts), while sfuncs
+;; tend to end with an EX or MT insn.  But that is not actually relevant,
+;; since there are no instructions that contend for memory access early.
+;; We could, of course, provide exact scheduling information for specific
+;; sfuncs, if that should prove useful.
+(define_insn_reservation "sh4_call" 16
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "call,sfunc"))
+  "d_lock*16")
+
+;; LDS.L to PR
+;; Group:	CO
+;; Latency: 	3
+;; Issue Rate: 	2
+;; The SX unit is blocked for last 2 cycles.
+(define_insn_reservation "ldsmem_to_pr"  3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "pload"))
+  "d_lock*2")
+
+;; STS from PR
+;; Group:	CO
+;; Latency: 	2
+;; Issue Rate: 	2
+;; The SX unit in second and third cycles.
+(define_insn_reservation "sts_from_pr" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "prget"))
+  "d_lock*2")
+
+;; STS.L from PR
+;; Group:	CO
+;; Latency: 	2
+;; Issue Rate: 	2
+(define_insn_reservation "sh4_prstore_mem" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "pstore"))
+  "d_lock*2,nothing,memory")
+
+;; LDS to FPSCR
+;; Group:	CO
+;; Latency: 	4
+;; Issue Rate: 	1
+;; F1 is blocked for last three cycles.
+(define_insn_reservation "fpscr_load" 4
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "gp_fpscr"))
+  "d_lock,nothing,F1*3")
+
+;; LDS.L to FPSCR
+;; Group:	CO
+;; Latency: 	1 / 4
+;; Latency to update Rn is 1 and latency to update FPSCR is 4
+;; Issue Rate: 	1
+;; F1 is blocked for last three cycles.
+(define_insn_reservation "fpscr_load_mem" 4
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type"  "mem_fpscr"))
+  "d_lock,nothing,(F1+memory),F1*2")
+
+
+;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W)
+;; Group:	CO
+;; Latency: 	4 / 4
+;; Issue Rate: 	2
+(define_insn_reservation "multi" 4
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "smpy,dmpy"))
+  "d_lock,(d_lock+f1_1),(f1_1|f1_2)*3,F2")
+
+;; Fixed STS from, and LDS to MACL / MACH
+;; Group:	CO
+;; Latency: 	3
+;; Issue Rate: 	1
+(define_insn_reservation "sh4_mac_gp" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "mac_gp,gp_mac,mem_mac"))
+  "d_lock")
+
+
+;; Single precision floating point computation FCMP/EQ,
+;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRCHG, FSCHG
+;; Group:	FE
+;; Latency: 	3/4
+;; Issue Rate: 	1
+(define_insn_reservation "fp_arith"  3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fp,fp_cmp"))
+  "issue,F01,F2")
+
+;; We don't model the resource usage of this exactly because that would
+;; introduce a bogus latency.
+(define_insn_reservation "sh4_fpscr_toggle"  1
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fpscr_toggle"))
+  "issue")
+
+(define_insn_reservation "fp_arith_ftrc"  3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "ftrc_s"))
+  "issue,F01,F2")
+
+(define_bypass 1 "fp_arith_ftrc" "sh4_fpul_gp")
+
+;; Single Precision FDIV/SQRT
+;; Group:	FE
+;; Latency: 	12/13 (FDIV); 11/12 (FSQRT)
+;; Issue Rate: 	1
+;; We describe fdiv here; fsqrt is actually one cycle faster.
+(define_insn_reservation "fp_div" 12
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "fdiv"))
+  "issue,F01+F3,F2+F3,F3*7,F1+F3,F2")
+
+;; Double Precision floating point computation
+;; (FCNVDS, FCNVSD, FLOAT, FTRC)
+;; Group:	FE
+;; Latency: 	(3,4)/5
+;; Issue Rate: 	1
+(define_insn_reservation "dp_float" 4
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "dfp_conv"))
+  "issue,F01,F1+F2,F2")
+
+;; Double-precision floating-point (FADD,FMUL,FSUB)
+;; Group:	FE
+;; Latency: 	(7,8)/9
+;; Issue Rate: 	1
+(define_insn_reservation "fp_double_arith" 8
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "dfp_arith,dfp_mul"))
+  "issue,F01,F1+F2,fpu*4,F2")
+
+;; Double-precision FCMP (FCMP/EQ,FCMP/GT)
+;; Group:	CO
+;; Latency: 	3/5
+;; Issue Rate: 	2
+(define_insn_reservation "fp_double_cmp" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "dfp_cmp"))
+  "d_lock,(d_lock+F01),F1+F2,F2")
+
+;; Double precision FDIV/SQRT
+;; Group:	FE
+;; Latency: 	(24,25)/26
+;; Issue Rate: 	1
+(define_insn_reservation "dp_div" 25
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "dfdiv"))
+  "issue,F01+F3,F1+F2+F3,F2+F3,F3*16,F1+F3,(fpu+F3)*2,F2")
+
+
+;; Use the branch-not-taken case to model arith3 insns.  For the branch taken
+;; case, we'd get a d_lock instead of issue at the end.
+(define_insn_reservation "arith3" 3
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "arith3"))
+  "issue,d_lock+pcr_addrcalc,issue")
+
+;; arith3b insns schedule the same no matter if the branch is taken or not.
+(define_insn_reservation "arith3b" 2
+  (and (eq_attr "pipe_model" "sh4")
+       (eq_attr "type" "arith3"))
+  "issue,d_lock+pcr_addrcalc")
diff --git a/gcc-4.9/gcc/config/sh/sh4a.md b/gcc-4.9/gcc/config/sh/sh4a.md
new file mode 100644
index 000000000..694185181
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh4a.md
@@ -0,0 +1,234 @@
+;; Scheduling description for Renesas SH4a
+;; Copyright (C) 2003-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GNU CC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GNU CC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; The following description models the SH4A pipeline
+;; using the DFA based scheduler.
+(define_automaton "sh4a")
+
+(define_cpu_unit "sh4a_ex"   "sh4a")
+(define_cpu_unit "sh4a_ls"   "sh4a")
+(define_cpu_unit "sh4a_fex"  "sh4a")
+(define_cpu_unit "sh4a_fls"  "sh4a")
+(define_cpu_unit "sh4a_mult" "sh4a")
+(define_cpu_unit "sh4a_fdiv" "sh4a")
+
+;; Decoding is done on the integer pipeline like the
+;; sh4. Define issue to be the | of the two pipelines
+;; to control how often instructions are issued.
+(define_reservation "ID_or" "sh4a_ex|sh4a_ls")
+(define_reservation "ID_and" "sh4a_ex+sh4a_ls")
+
+;; =======================================================
+;; Locking Descriptions
+
+;; Sh4a_Memory access on the LS pipeline.
+(define_cpu_unit "sh4a_memory" "sh4a")
+
+;; Other access on the LS pipeline.
+(define_cpu_unit "sh4a_load_store" "sh4a")
+
+;;  The address calculator used for branch instructions.
+;; This will be reserved after "issue" of branch instructions
+;; and this is to make sure that no two branch instructions
+;; can be issued in parallel.
+(define_reservation "sh4a_addrcalc" "sh4a_ex")
+
+;; =======================================================
+;; Reservations
+
+;; Branch (BF,BF/S,BT,BT/S,BRA,BSR)
+;; Group: BR
+;; Latency when taken: 2
+(define_insn_reservation "sh4a_branch" 2
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "cbranch,jump"))
+  "ID_or+sh4a_addrcalc")
+
+;; Jump (JSR,JMP,RTS)
+;; Group: BR
+;; Latency: 3
+(define_insn_reservation "sh4a_jump" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "return,jump_ind"))
+  "ID_or+sh4a_addrcalc")
+
+;; RTE
+;; Group: CO
+;; Latency: 3
+(define_insn_reservation "sh4a_rte" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "rte"))
+  "ID_and*4")
+
+;; EX Group Single
+;; Group: EX
+;; Latency: 0
+(define_insn_reservation "sh4a_ex" 0
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "insn_class" "ex_group"))
+  "sh4a_ex")
+
+;; MOVA
+;; Group: LS
+;; Latency: 1
+(define_insn_reservation "sh4a_mova" 1
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "mova"))
+  "sh4a_ls+sh4a_load_store")
+
+;; MOV
+;; Group: MT
+;; Latency: 0
+;; ??? not sure if movi8 belongs here, but that's where it was
+;; effectively before.
+(define_insn_reservation "sh4a_mov" 0
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "move,movi8,gp_mac"))
+  "ID_or")
+
+;; Load
+;; Group: LS
+;; Latency: 3
+(define_insn_reservation "sh4a_load" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "load,pcload,mem_mac"))
+  "sh4a_ls+sh4a_memory")
+
+(define_insn_reservation "sh4a_load_si" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "load_si,pcload_si"))
+  "sh4a_ls+sh4a_memory")
+
+;; Store
+;; Group: LS
+;; Latency: 0
+(define_insn_reservation "sh4a_store" 0
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "store,fstore,mac_mem"))
+  "sh4a_ls+sh4a_memory")
+
+;; CWB TYPE
+
+;; MOVUA
+;; Group: LS
+;; Latency: 3
+(define_insn_reservation "sh4a_movua" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "movua"))
+  "sh4a_ls+sh4a_memory*2")
+
+;; Fixed point multiplication (single)
+;; Group: CO
+;; Latency: 2
+(define_insn_reservation "sh4a_smult" 2
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "smpy"))
+  "ID_or+sh4a_mult")
+
+;; Fixed point multiplication (double)
+;; Group: CO
+;; Latency: 3
+(define_insn_reservation "sh4a_dmult" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "dmpy"))
+  "ID_or+sh4a_mult")
+
+(define_insn_reservation "sh4a_mac_gp" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "mac_gp"))
+  "ID_and")
+
+;; Other MT  group instructions(1 step operations)
+;; Group:	MT
+;; Latency: 	1
+(define_insn_reservation "sh4a_mt" 1
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "mt_group"))
+  "ID_or")
+
+;; Floating point reg move
+;; Group: LS
+;; Latency: 2
+(define_insn_reservation "sh4a_freg_mov" 2
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fmove"))
+  "sh4a_ls,sh4a_fls")
+
+;; Single precision floating point computation FCMP/EQ,
+;; FCMP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRVHG, FSCHG
+;; Group:	FE
+;; Latency: 	3
+(define_insn_reservation "sh4a_fp_arith"  3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fp,fp_cmp,fpscr_toggle"))
+  "ID_or,sh4a_fex")
+
+(define_insn_reservation "sh4a_fp_arith_ftrc"  3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "ftrc_s"))
+  "ID_or,sh4a_fex")
+
+;; Single-precision FDIV/FSQRT
+;; Group: FE
+;; Latency: 20
+(define_insn_reservation "sh4a_fdiv" 20
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fdiv"))
+  "ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex")
+
+;; Double Precision floating point computation
+;; (FCNVDS, FCNVSD, FLOAT, FTRC)
+;; Group:	FE
+;; Latency: 	3
+(define_insn_reservation "sh4a_dp_float" 3
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "dfp_conv"))
+  "ID_or,sh4a_fex")
+
+;; Double-precision floating-point (FADD,FMUL,FSUB)
+;; Group:	FE
+;; Latency: 	5
+(define_insn_reservation "sh4a_fp_double_arith" 5
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "dfp_arith,dfp_mul"))
+  "ID_or,sh4a_fex*3")
+
+;; Double precision FDIV/SQRT
+;; Group:	FE
+;; Latency: 	36
+(define_insn_reservation "sh4a_dp_div" 36
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "dfdiv"))
+  "ID_or,sh4a_fex+sh4a_fdiv,sh4a_fex*2")
+
+;; FSRRA
+;; Group: FE
+;; Latency: 5
+(define_insn_reservation "sh4a_fsrra" 5
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fsrra"))
+  "ID_or,sh4a_fex")
+
+;; FSCA
+;; Group: FE
+;; Latency: 7
+(define_insn_reservation "sh4a_fsca" 7
+  (and (eq_attr "cpu" "sh4a")
+       (eq_attr "type" "fsca"))
+  "ID_or,sh4a_fex*3")
diff --git a/gcc-4.9/gcc/config/sh/sh64.h b/gcc-4.9/gcc/config/sh/sh64.h
new file mode 100644
index 000000000..73d91caa3
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh64.h
@@ -0,0 +1,22 @@
+/* Definitions of target machine for GNU compiler for SuperH SH 5.
+   Copyright (C) 2000-2014 Free Software Foundation, Inc.
+   Contributed by Alexandre Oliva <aoliva@redhat.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef SH_ELF_WCHAR_TYPE
+#define SH_ELF_WCHAR_TYPE "int"
diff --git a/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc b/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc
new file mode 100644
index 000000000..313e5b5f4
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh_optimize_sett_clrt.cc
@@ -0,0 +1,473 @@
+/* An SH specific RTL pass that tries to optimize clrt and sett insns.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "machmode.h"
+#include "basic-block.h"
+#include "df.h"
+#include "rtl.h"
+#include "insn-config.h"
+#include "tree-pass.h"
+#include "target.h"
+
+#include <vector>
+#include <algorithm>
+
+/*
+This pass tries to eliminate unnecessary sett or clrt instructions in cases
+where the ccreg value is already known to be the same as the constant set
+would set it to.  This is done as follows:
+
+Check every BB's insn and see if it's a sett or clrt.
+Once a sett or clrt insn is hit, walk insns and predecessor basic blocks
+backwards from that insn and determine all possible ccreg values from all
+basic block paths.
+Insns that set the ccreg value in some way (simple set, clobber etc) are
+recorded.  Conditional branches where one edge leads to the sett / clrt insn
+are also recorded, since for each edge in the conditional branch the ccreg
+value is known constant.
+After collecting all possible ccreg values at the sett / clrt insn, check that
+all the values are the same.  If that value is the same as the sett / clrt
+insn would set the ccreg to, the sett / clrt insn can be eliminated.
+*/
+
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// Helper functions
+
+#define log_msg(...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); } while (0)
+
+#define log_insn(i)\
+  do { if (dump_file != NULL) print_rtl_single (dump_file, \
+						(const_rtx)i); } while (0)
+
+#define log_rtx(r)\
+  do { if (dump_file != NULL) print_rtl (dump_file, (const_rtx)r); } while (0)
+
+#define log_return(retval, ...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \
+       return retval; } while (0)
+
+#define log_return_void(...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \
+       return; } while (0)
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// RTL pass class
+
+class sh_optimize_sett_clrt : public rtl_opt_pass
+{
+public:
+  sh_optimize_sett_clrt (gcc::context* ctx, const char* name);
+  virtual ~sh_optimize_sett_clrt (void);
+  virtual bool gate (void);
+  virtual unsigned int execute (void);
+
+private:
+  static const pass_data default_pass_data;
+
+  struct ccreg_value
+  {
+    // The insn at which the ccreg value was determined.
+    // Might be NULL_RTX if e.g. an unknown value is recorded for an
+    // empty basic block.
+    rtx insn;
+
+    // The basic block where the insn was discovered.
+    basic_block bb;
+
+    // The value of ccreg.  If NULL_RTX, the exact value is not known, but
+    // the ccreg is changed in some way (e.g. clobbered).
+    rtx value;
+  };
+
+  // Update the mode of the captured m_ccreg with the specified mode.
+  void update_ccreg_mode (machine_mode m);
+
+  // Given an insn pattern, check if it sets the ccreg to a constant value
+  // of either zero or STORE_FLAG_VALUE.  If so, return the value rtx,
+  // NULL_RTX otherwise.
+  rtx const_setcc_value (rtx pat) const;
+
+  // Given a start insn and its basic block, recursively determine all
+  // possible ccreg values in all basic block paths that can lead to the
+  // start insn.
+  void find_last_ccreg_values (rtx start_insn, basic_block bb,
+			       std::vector<ccreg_value>& values_out,
+			       std::vector<basic_block>& prev_visited_bb) const;
+
+  // Given a cbranch insn, its basic block and another basic block, determine
+  // the value to which the ccreg will be set after jumping/falling through to
+  // the specified target basic block.
+  bool sh_cbranch_ccreg_value (rtx cbranch_insn,
+			       basic_block cbranch_insn_bb,
+			       basic_block branch_target_bb) const;
+
+  // Check whether all of the ccreg values are the same.
+  static bool all_ccreg_values_equal (const std::vector<ccreg_value>& values);
+
+  // Remove REG_DEAD and REG_UNUSED notes from insns of the specified
+  // ccreg_value entries.
+  void remove_ccreg_dead_unused_notes (std::vector<ccreg_value>& values) const;
+
+  // rtx of the ccreg that is obtained from the target.
+  rtx m_ccreg;
+};
+
+const pass_data sh_optimize_sett_clrt::default_pass_data =
+{
+  RTL_PASS,		// type
+  "",			// name (overwritten by the constructor)
+  OPTGROUP_NONE,	// optinfo_flags
+  true,			// has_gate
+  true,			// has_execute
+  TV_OPTIMIZE,		// tv_id
+  0,			// properties_required
+  0,			// properties_provided
+  0,			// properties_destroyed
+  0,			// todo_flags_start
+  0			// todo_flags_finish
+};
+
+sh_optimize_sett_clrt::sh_optimize_sett_clrt (gcc::context* ctx,
+					      const char* name)
+: rtl_opt_pass (default_pass_data, ctx),
+  m_ccreg (NULL_RTX)
+{
+  // Overwrite default name in pass_data base class.
+  this->name = name;
+}
+
+sh_optimize_sett_clrt::~sh_optimize_sett_clrt (void)
+{
+}
+
+bool
+sh_optimize_sett_clrt::gate (void)
+{
+  return optimize > 0;
+}
+
+unsigned int
+sh_optimize_sett_clrt::execute (void)
+{
+  unsigned int ccr0 = INVALID_REGNUM;
+  unsigned int ccr1 = INVALID_REGNUM;
+
+  if (targetm.fixed_condition_code_regs (&ccr0, &ccr1)
+      && ccr0 != INVALID_REGNUM)
+    {
+      // Initially create a reg rtx with VOIDmode.
+      // When the constant setcc is discovered, the mode is changed
+      // to the mode that is actually used by the target.
+      m_ccreg = gen_rtx_REG (VOIDmode, ccr0);
+    }
+
+  if (m_ccreg == NULL_RTX)
+    log_return (0, "no ccreg.\n\n");
+
+  if (STORE_FLAG_VALUE != 1)
+    log_return (0, "unsupported STORE_FLAG_VALUE %d", STORE_FLAG_VALUE);
+
+  log_msg ("ccreg: ");
+  log_rtx (m_ccreg);
+  log_msg ("  STORE_FLAG_VALUE = %d\n", STORE_FLAG_VALUE);
+
+  if (!df_regs_ever_live_p (ccr0))
+    log_return (0, "ccreg never live\n\n");
+
+  // Output vector for find_known_ccreg_values.
+  std::vector<ccreg_value> ccreg_values;
+  ccreg_values.reserve (32);
+
+  // Something for recording visited basic blocks to avoid infinite recursion.
+  std::vector<basic_block> visited_bbs;
+  visited_bbs.reserve (32);
+
+  // Look for insns that set the ccreg to a constant value and see if it can
+  // be optimized.
+  basic_block bb;
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
+    for (rtx next_i, i = NEXT_INSN (BB_HEAD (bb));
+	 i != NULL_RTX && i != BB_END (bb); i = next_i)
+      {
+	next_i = NEXT_INSN (i);
+
+	if (!INSN_P (i) || !NONDEBUG_INSN_P (i))
+	  continue;
+
+	rtx setcc_val = const_setcc_value (PATTERN (i));
+	if (setcc_val != NULL_RTX)
+	  {
+	    update_ccreg_mode (GET_MODE (XEXP (PATTERN (i), 0)));
+
+	    log_msg ("\n\nfound const setcc insn in [bb %d]: \n", bb->index);
+	    log_insn (i);
+	    log_msg ("\n");
+
+	    ccreg_values.clear ();
+	    visited_bbs.clear ();
+	    find_last_ccreg_values (PREV_INSN (i), bb, ccreg_values,
+				    visited_bbs);
+
+	    log_msg ("number of ccreg values collected: %u\n",
+		     (unsigned int)ccreg_values.size ());
+
+	    // If all the collected values are equal and are equal to the
+	    // constant value of the setcc insn, the setcc insn can be
+	    // removed.
+	    if (all_ccreg_values_equal (ccreg_values)
+		&& rtx_equal_p (ccreg_values.front ().value, setcc_val))
+	      {
+		log_msg ("all values are ");
+		log_rtx (setcc_val);
+		log_msg ("\n");
+
+		delete_insn (i);
+		remove_ccreg_dead_unused_notes (ccreg_values);
+	      }
+	  }
+      }
+
+  log_return (0, "\n\n");
+}
+
+void
+sh_optimize_sett_clrt::update_ccreg_mode (machine_mode m)
+{
+  if (GET_MODE (m_ccreg) == m)
+    return;
+
+  PUT_MODE (m_ccreg, m);
+  log_msg ("updated ccreg mode: ");
+  log_rtx (m_ccreg);
+  log_msg ("\n\n");
+}
+
+rtx
+sh_optimize_sett_clrt::const_setcc_value (rtx pat) const
+{
+  if (GET_CODE (pat) == SET
+      && REG_P (XEXP (pat, 0)) && REGNO (XEXP (pat, 0)) == REGNO (m_ccreg)
+      && CONST_INT_P (XEXP (pat, 1))
+      && (INTVAL (XEXP (pat, 1)) == 0
+	  || INTVAL (XEXP (pat, 1)) == STORE_FLAG_VALUE))
+    return XEXP (pat, 1);
+  else
+    return NULL_RTX;
+}
+
+bool
+sh_optimize_sett_clrt
+::sh_cbranch_ccreg_value (rtx cbranch_insn, basic_block cbranch_insn_bb,
+			  basic_block branch_target_bb) const
+{
+  rtx pc_set_rtx = pc_set (cbranch_insn);
+  gcc_assert (pc_set_rtx != NULL_RTX);
+  gcc_assert (branch_target_bb != NULL);
+
+  rtx cond = XEXP (XEXP (pc_set_rtx, 1), 0);
+  bool branch_if;
+
+  if (GET_CODE (cond) == NE
+      && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (m_ccreg)
+      && XEXP (cond, 1) == const0_rtx)
+    branch_if = true;
+
+  else if (GET_CODE (cond) == EQ
+      && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (m_ccreg)
+      && XEXP (cond, 1) == const0_rtx)
+    branch_if = false;
+
+  else
+    gcc_unreachable ();
+
+  if (branch_target_bb == BRANCH_EDGE (cbranch_insn_bb)->dest)
+    return branch_if;
+  else if (branch_target_bb == FALLTHRU_EDGE (cbranch_insn_bb)->dest)
+    return !branch_if;
+  else
+    gcc_unreachable ();
+}
+
+void
+sh_optimize_sett_clrt
+::find_last_ccreg_values (rtx start_insn, basic_block bb,
+			  std::vector<ccreg_value>& values_out,
+			  std::vector<basic_block>& prev_visited_bb) const
+{
+  // FIXME: For larger CFGs this will unnecessarily re-visit basic blocks.
+  // Once a basic block has been visited, the result should be stored in
+  // some container so that it can be looked up quickly eliminating the
+  // re-visits.
+  log_msg ("looking for ccreg values in [bb %d] ", bb->index);
+  if (!prev_visited_bb.empty ())
+    log_msg ("(prev visited [bb %d])", prev_visited_bb.back ()->index);
+  log_msg ("\n");
+
+  for (rtx i = start_insn; i != NULL_RTX && i != PREV_INSN (BB_HEAD (bb));
+       i = PREV_INSN (i))
+    {
+      if (!INSN_P (i))
+	continue;
+
+      if (reg_set_p (m_ccreg, i))
+	{
+	  const_rtx set_rtx = set_of (m_ccreg, i);
+
+	  ccreg_value v;
+	  v.insn = i;
+	  v.bb = bb;
+	  v.value = set_rtx != NULL_RTX && GET_CODE (set_rtx) == SET
+		    ? XEXP (set_rtx, 1)
+		    : NULL_RTX;
+
+	  log_msg ("found setcc in [bb %d] in insn:\n", bb->index);
+	  log_insn (i);
+	  log_msg ("\nccreg value: ");
+	  log_rtx (v.value);
+	  log_msg ("\n");
+
+	  values_out.push_back (v);
+	  return;
+	}
+
+      if (any_condjump_p (i) && onlyjump_p (i) && !prev_visited_bb.empty ())
+	{
+	  // For a conditional branch the ccreg value will be a known constant
+	  // of either 0 or STORE_FLAG_VALUE after branching/falling through
+	  // to one of the two successor BBs.  Record the value for the BB
+	  // where we came from.
+	  log_msg ("found cbranch in [bb %d]:\n", bb->index);
+	  log_insn (i);
+
+	  ccreg_value v;
+	  v.insn = i;
+	  v.bb = bb;
+	  v.value = GEN_INT (sh_cbranch_ccreg_value (i, bb,
+						     prev_visited_bb.back ()));
+
+	  log_msg ("    branches to [bb %d] with ccreg value ",
+		   prev_visited_bb.back ()->index);
+	  log_rtx (v.value);
+	  log_msg ("\n");
+
+	  values_out.push_back (v);
+	  return;
+	}
+    }
+
+  // If here, we've walked up all the insns of the current basic block
+  // and none of them seems to modify the ccreg.
+  // In this case, check the predecessor basic blocks.
+  unsigned int pred_bb_count = 0;
+
+  // If the current basic block is not in the stack of previously visited
+  // basic blocks yet, we can recursively check the predecessor basic blocks.
+  // Otherwise we have a loop in the CFG and recursing again will result in
+  // an infinite loop.
+  if (std::find (prev_visited_bb.rbegin (), prev_visited_bb.rend (), bb)
+      == prev_visited_bb.rend ())
+    {
+      prev_visited_bb.push_back (bb);
+
+      for (edge_iterator ei = ei_start (bb->preds); !ei_end_p (ei);
+	   ei_next (&ei))
+	{
+	  basic_block pred_bb = ei_edge (ei)->src;
+	  pred_bb_count += 1;
+	  find_last_ccreg_values (BB_END (pred_bb), pred_bb, values_out,
+				  prev_visited_bb);
+	}
+
+      prev_visited_bb.pop_back ();
+    }
+  else
+    log_msg ("loop detected for [bb %d]\n", bb->index);
+
+  log_msg ("[bb %d] pred_bb_count = %u\n", bb->index, pred_bb_count);
+
+  if (pred_bb_count == 0)
+  {
+    // If we haven't checked a single predecessor basic block, the current
+    // basic block is probably a leaf block and we don't know the ccreg value.
+    log_msg ("unknown ccreg value for [bb %d]\n", bb->index);
+
+    ccreg_value v;
+    v.insn = BB_END (bb);
+    v.bb = bb;
+    v.value = NULL_RTX;
+
+    values_out.push_back (v);
+  }
+}
+
+bool
+sh_optimize_sett_clrt
+::all_ccreg_values_equal (const std::vector<ccreg_value>& values)
+{
+  if (values.empty ())
+    return false;
+
+  rtx last_value = values.front ().value;
+
+  // If the ccreg is modified in the insn but the exact value is not known
+  // the value rtx might be null.
+  if (last_value == NULL_RTX)
+    return false;
+
+  for (std::vector<ccreg_value>::const_iterator i = values.begin ();
+       i != values.end (); ++i)
+    if (i->value == NULL_RTX || !rtx_equal_p (last_value, i->value))
+      return false;
+
+  return true;
+}
+
+void
+sh_optimize_sett_clrt
+::remove_ccreg_dead_unused_notes (std::vector<ccreg_value>& values) const
+{
+  for (std::vector<ccreg_value>::iterator i = values.begin ();
+       i != values.end (); ++i)
+    {
+      if (i->insn == NULL_RTX)
+	continue;
+
+      rtx n = find_regno_note (i->insn, REG_DEAD, REGNO (m_ccreg));
+      if (n != NULL_RTX)
+	remove_note (i->insn, n);
+
+      n = find_regno_note (i->insn, REG_UNUSED, REGNO (m_ccreg));
+      if (n != NULL_RTX)
+	remove_note (i->insn, n);
+    }
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// This allows instantiating the pass somewhere else without having to pull
+// in a header file.
+opt_pass*
+make_pass_sh_optimize_sett_clrt (gcc::context* ctx, const char* name)
+{
+  return new sh_optimize_sett_clrt (ctx, name);
+}
diff --git a/gcc-4.9/gcc/config/sh/sh_treg_combine.cc b/gcc-4.9/gcc/config/sh/sh_treg_combine.cc
new file mode 100644
index 000000000..e73604022
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sh_treg_combine.cc
@@ -0,0 +1,1509 @@
+/* An SH specific RTL pass that tries to combine comparisons and redundant
+   condition code register stores across multiple basic blocks.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "machmode.h"
+#include "basic-block.h"
+#include "df.h"
+#include "rtl.h"
+#include "insn-config.h"
+#include "insn-codes.h"
+#include "emit-rtl.h"
+#include "recog.h"
+#include "tree-pass.h"
+#include "target.h"
+#include "expr.h"
+
+#include <algorithm>
+#include <list>
+#include <vector>
+
+/*
+This pass tries to optimize for example this:
+	mov.l	@(4,r4),r1
+	tst	r1,r1
+	movt	r1
+	tst	r1,r1
+	bt/s	.L5
+
+into something simpler:
+	mov.l	@(4,r4),r1
+	tst	r1,r1
+	bf/s	.L5
+
+Such sequences can be identified by looking for conditional branches and
+checking whether the ccreg is set before the conditional branch
+by testing another register for != 0, which was set by a ccreg store.
+This can be optimized by eliminating the redundant comparison and
+inverting the branch condition.  There can be multiple comparisons in
+different basic blocks that all end up in the redunant test insn before the
+conditional branch.  Some example RTL ...
+
+Example 1)
+----------
+
+[bb 3]
+(set (reg:SI 147 t) (eq:SI (reg:SI 173) (const_int 0)))
+(set (reg:SI 167) (xor:SI (reg:SI 147 t) (const_int 1)))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (eq:SI (reg:SI 177) (const_int 0)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 50) (pc)))
+
+In [bb 4] elimination of the comparison would require inversion of the branch
+condition and compensation of other BBs.
+Instead an inverting reg-move can be used:
+
+[bb 3]
+(set (reg:SI 167) (reg:SI 173))
+-> bb 5
+
+[BB 4]
+(set (reg:SI 167) (not:SI (reg:SI 177)))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0)))
+                        (label_ref:SI 50) (pc)))
+
+
+Example 2)
+----------
+
+[bb 3]
+(set (reg:SI 147 t) (gt:SI (reg:SI 173) (reg:SI 175)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (gt:SI (reg:SI 177) (reg:SI 179)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 51) (pc)))
+
+The common comparison is factored out and the branch condition is inverted:
+
+[bb 3]
+(set (reg:SI 167) (reg:SI 173))
+(set (reg:SI 200) (reg:SI 175))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 167) (reg:SI 177))
+(set (reg:SI 200) (reg:SI 179))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (gt:SI (reg:SI 167) (reg:SI 200)))
+(set (pc) (if_then_else (eq (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 51) (pc)))
+
+
+Example 3)
+----------
+
+[bb 3]
+(set (reg:SI 147 t) (gt:SI (reg:SI 173) (reg:SI 175)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (ge:SI (reg:SI 179) (reg:SI 177)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 51) (pc)))
+
+The T bit lifetime is extended and the branch condition is inverted:
+
+[bb 3]
+(set (reg:SI 147 t) (gt:SI (reg:SI 173) (reg:SI 175)))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (ge:SI (reg:SI 179) (reg:SI 177)))
+-> bb 5
+
+[bb 5]
+(set (pc) (if_then_else (eq (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 51) (pc)))
+
+
+Example 4)
+----------
+
+[bb 3]
+(set (reg:SI 147 t) (eq:SI (reg:SI 173) (const_int 5)))
+(set (reg:SI 167) (reg:SI 147 t))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (eq:SI (reg:SI 176) (const_int 5)))
+(set (reg:SI 167) (xor:SI (reg:SI 147 t) (const_int 1)))
+-> bb 5
+
+[bb 5]
+(set (reg:SI 147 t) (eq:SI (reg:SI 167) (const_int 0)))
+(set (pc) (if_then_else (ne (reg:SI 147 t) (const_int 0))
+                        (label_ref:SI 50) (pc)))
+
+In this case the comparisons are the same and could be combined, but the
+branch condition is different for [bb 3] and [bb 5].  Since the comparison
+is not a zero comparison, we can't negate one of the operands.  The best thing
+we can do here is to eliminate the comparison before the cbranch and invert
+the ccreg in one of the BBs.  On SH2A this will utilize the 'nott' instruction.
+
+[bb 3]
+(set (reg:SI 147 t) (eq:SI (reg:SI 173) (const_int 5)))
+-> bb 5
+
+[bb 4]
+(set (reg:SI 147 t) (eq:SI (reg:SI 176) (const_int 5)))
+(set (reg:SI 147 t) (xor:SI (reg:SI 147 t) (const_int 1)))
+-> bb 5
+
+[bb 5]
+(set (pc) (if_then_else (eq (reg:SI 147 t) (const_int 0))  // inverted
+                        (label_ref:SI 50) (pc)))
+
+
+In order to handle cases such as above the RTL pass does the following:
+
+- Find the ccreg sets (comparisons) and ccreg stores
+  (inverting and non-inverting) in all related BBs.
+
+- If the comparison types in the BBs are all the same, try to combine the
+  comparisons in the BBs and replace the zero comparison before the cbranch
+  with the common comparison.
+
+    - If the cstores are the same, move the comparison before the cbranch
+      and replace the comparisons in the BBs with reg-reg copies to get the
+      operands in place (create new pseudo regs).
+
+    - If the cstores differ, try to apply the special case
+        (eq (reg) (const_int 0)) -> inverted = (not (reg)).
+      for the subordinate cstore types and eliminate the dominating ones.
+
+- If the comparison types in the BBs are not the same, or the first approach
+  doesn't work out for some reason, try to eliminate the comparison before the
+  cbranch by extending the lifetime of the ccreg by leaving the individual
+  comparisons but eliminating the cstores.
+  If the cstores are all the same this is straight forward.
+  If they're not, try to reverse the ccreg for the subordinate cstore type
+  and eliminate the dominating one.
+*/
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// Helper functions
+
+#define log_msg(...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); } while (0)
+
+#define log_insn(i)\
+  do { if (dump_file != NULL) print_rtl_single (dump_file, \
+						(const_rtx)i); } while (0)
+
+#define log_rtx(r)\
+  do { if (dump_file != NULL) print_rtl (dump_file, (const_rtx)r); } while (0)
+
+#define log_return(retval, ...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \
+       return retval; } while (0)
+
+#define log_return_void(...)\
+  do { if (dump_file != NULL) fprintf (dump_file, __VA_ARGS__); \
+       return; } while (0)
+
+struct set_of_reg
+{
+  // The insn where the search stopped or NULL_RTX.
+  rtx insn;
+
+  // The set rtx of the specified reg if found, NULL_RTX otherwise.
+  // Notice that the set rtx can also be in a parallel.
+  const_rtx set_rtx;
+
+  // The set source operand rtx if found, NULL_RTX otherwise.
+  rtx
+  set_src (void) const
+  {
+    return set_rtx == NULL_RTX ? NULL_RTX : XEXP (set_rtx, 1);
+  }
+
+  // The set destination operand rtx if found, NULL_RTX otherwise.
+  rtx
+  set_dst (void) const
+  {
+    return set_rtx == NULL_RTX ? NULL_RTX : XEXP (set_rtx, 0);
+  }
+
+  bool
+  empty (void) const
+  {
+    return insn == NULL_RTX || set_rtx == NULL_RTX;
+  }
+};
+
+// Given a reg rtx and a start insn find the insn (in the same basic block)
+// that sets the reg.
+static set_of_reg
+find_set_of_reg_bb (rtx reg, rtx insn)
+{
+  set_of_reg result = { insn, NULL_RTX };
+
+  if (!REG_P (reg) || insn == NULL_RTX)
+    return result;
+
+  for (result.insn = insn; result.insn != NULL_RTX;
+       result.insn = prev_nonnote_insn_bb (result.insn))
+    {
+      if (BARRIER_P (result.insn))
+	return result;
+      if (!NONJUMP_INSN_P (result.insn))
+	continue;
+      if (reg_set_p (reg, result.insn))
+	{
+	  result.set_rtx = set_of (reg, result.insn);
+	  if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
+	    result.set_rtx = NULL_RTX;
+	  return result;
+	}
+    }
+
+  return result;
+}
+
+static bool
+reg_dead_after_insn (const_rtx reg, const_rtx insn)
+{
+  return find_regno_note (insn, REG_DEAD, REGNO (reg)) != NULL_RTX;
+}
+
+static bool
+reg_unused_after_insn (const_rtx reg, const_rtx insn)
+{
+  return find_regno_note (insn, REG_UNUSED, REGNO (reg)) != NULL_RTX;
+}
+
+// Check whether the two specified basic blocks are adjacent, i.e. there's no
+// other basic block in between them.
+static bool
+is_adjacent_bb (basic_block a, basic_block b)
+{
+  basic_block bb0[] = { a, b };
+  basic_block bb1[] = { b, a };
+
+  for (int i = 0; i < 2; ++i)
+    for (edge_iterator ei = ei_start (bb0[i]->succs);
+	 !ei_end_p (ei); ei_next (&ei))
+      if (ei_edge (ei)->dest == bb1[i])
+	return true;
+
+  return false;
+}
+
+// Internal function of trace_reg_uses.
+static void
+trace_reg_uses_1 (rtx reg, rtx start_insn, basic_block bb, int& count,
+		  std::vector<basic_block>& visited_bb, rtx abort_at_insn)
+{
+  if (bb == NULL)
+    return;
+
+  if (std::find (visited_bb.begin (), visited_bb.end (), bb)
+      != visited_bb.end ())
+    log_return_void ("[bb %d] already visited\n", bb->index);
+
+  visited_bb.push_back (bb);
+
+  if (BB_END (bb) == NULL_RTX)
+    log_return_void ("[bb %d] BB_END is null\n", bb->index);
+
+  if (start_insn == NULL_RTX)
+    log_return_void ("[bb %d] start_insn is null\n", bb->index);
+
+  rtx end_insn = NEXT_INSN (BB_END (bb));
+  if (end_insn == NULL_RTX)
+    log_return_void ("[bb %d] end_insn is null\n", bb->index);
+
+  for (rtx i = NEXT_INSN (start_insn); i != end_insn; i = NEXT_INSN (i))
+    {
+      if (INSN_P (i))
+	{
+	  if (NONDEBUG_INSN_P (i)
+	      && (reg_overlap_mentioned_p (reg, PATTERN (i))
+		  || (CALL_P (i) && find_reg_fusage (i, USE, reg))))
+	    {
+	      log_msg ("found use in [bb %d] at insn:\n", bb->index);
+	      log_insn (i);
+	      log_msg ("\n");
+	      count += 1;
+	    }
+
+	  // Stop following this BB if the reg is set or dies along the way.
+	  if (reg_set_p (reg, i) || reg_dead_after_insn (reg, i))
+	    return;
+	}
+
+      if (abort_at_insn != NULL_RTX && abort_at_insn == i)
+	return;
+    }
+
+  for (edge_iterator ei = ei_start (bb->succs); !ei_end_p (ei); ei_next (&ei))
+    {
+      basic_block succ_bb = ei_edge (ei)->dest;
+      trace_reg_uses_1 (reg, BB_HEAD (succ_bb), succ_bb, count, visited_bb,
+			abort_at_insn);
+    }
+}
+
+// Trace uses of the specified reg in all basic blocks that are reachable from
+// the specified insn.  If 'abort_at_insn' is not null, abort the trace at
+// that insn.  If the insn 'abort_at_insn' uses the specified reg, it is also
+// counted.
+static int
+trace_reg_uses (rtx reg, rtx start_insn, rtx abort_at_insn)
+{
+  log_msg ("\ntrace_reg_uses\nreg = ");
+  log_rtx (reg);
+  log_msg ("\nstart_insn = ");
+  log_insn (start_insn);
+
+  int count = 0;
+  std::vector<basic_block> visited_bb;
+  visited_bb.reserve (32);
+
+  trace_reg_uses_1 (reg, start_insn, BLOCK_FOR_INSN (start_insn),
+		    count, visited_bb, abort_at_insn);
+  return count;
+}
+
+// FIXME: Remove dependency on SH predicate function somehow.
+extern int t_reg_operand (rtx, machine_mode);
+extern int negt_reg_operand (rtx, machine_mode);
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// RTL pass class
+
+class sh_treg_combine : public rtl_opt_pass
+{
+public:
+  sh_treg_combine (gcc::context* ctx, bool split_insns, const char* name);
+  virtual ~sh_treg_combine (void);
+  virtual bool gate (void);
+  virtual unsigned int execute (void);
+
+private:
+  // Type of ccreg store that is supported.
+  enum cstore_type_t
+  {
+    cstore_normal = 0,
+    cstore_inverted = 1,
+    cstore_unknown = -1
+  };
+
+  // Type of branch condition that is supported.
+  enum branch_condition_type_t
+  {
+    branch_if_true = 1,
+    branch_if_false = 0,
+    unknown_branch_condition = -1
+  };
+
+  // For each basic block there can be a trace entry which consists of an
+  // insn that sets the ccreg (usually a comparison) and a ccreg store.
+  struct bb_entry
+  {
+    basic_block bb;
+    set_of_reg setcc;
+    set_of_reg cstore;
+    cstore_type_t cstore_type;
+    std::vector<set_of_reg> cstore_reg_reg_copies;
+
+    bb_entry (basic_block b)
+    : bb (b), setcc (), cstore (), cstore_type (cstore_unknown) { }
+
+    rtx comparison_rtx (void) const { return setcc.set_src (); }
+  };
+
+  // A ccreg trace for a conditional branch.
+  struct cbranch_trace
+  {
+    rtx cbranch_insn;
+    branch_condition_type_t cbranch_type;
+
+    // The comparison against zero right before the conditional branch.
+    set_of_reg setcc;
+
+    // All BBs that are related to the cbranch.  The last BB in the list is
+    // the BB of the cbranch itself and might be empty.
+    std::list<bb_entry> bb_entries;
+
+    cbranch_trace (rtx insn)
+    : cbranch_insn (insn),
+      cbranch_type (unknown_branch_condition),
+      setcc ()
+    {
+    }
+
+    basic_block bb (void) const { return BLOCK_FOR_INSN (cbranch_insn); }
+
+    rtx
+    branch_condition_rtx (void) const
+    {
+      rtx x = pc_set (cbranch_insn);
+      return x == NULL_RTX ? NULL_RTX : XEXP (XEXP (x, 1), 0);
+    }
+
+    bool
+    can_invert_condition (void) const
+    {
+      // The branch condition can be inverted safely only if the condition
+      // reg is dead after the cbranch.
+      return reg_dead_after_insn (XEXP (branch_condition_rtx (), 0),
+				  cbranch_insn);
+    }
+  };
+
+  static const pass_data default_pass_data;
+
+  // Tells whether modified or newly added insns are to be split at the end
+  // of the pass.
+  const bool m_split_insns;
+
+  // rtx of the ccreg that is obtained from the target.
+  rtx m_ccreg;
+
+  // Newly added or modified insns.
+  std::vector<rtx> m_touched_insns;
+
+  // Given an rtx determine whether it's a comparison with a constant zero.
+  static bool is_cmp_eq_zero (const_rtx i);
+
+  // Update the stored mode of the ccreg from the given branch condition rtx.
+  void update_ccreg_mode (const_rtx cond);
+
+  // Given an rtx, figure out the branch condition, assuming that it is
+  // in canonical form:
+  //   (ne (reg) (const_int 0))
+  //   (eq (reg) (const_int 0))
+  branch_condition_type_t branch_condition_type (const_rtx cond) const;
+
+  // Return true if the specified rtx is either a normal ccreg or
+  // a negated form of the ccreg.
+  bool is_normal_ccreg (const_rtx x) const;
+  bool is_inverted_ccreg (const_rtx x) const;
+
+  // Given a reg rtx and a start insn rtx, try to find the insn in the same
+  // basic block that sets the specified reg.
+  // Return how the search ended and the insn where it stopped or NULL_RTX.
+  enum record_return_t
+  {
+    set_found,
+    set_not_found,
+    other_set_found
+  };
+  record_return_t record_set_of_reg (rtx reg, rtx start_insn, bb_entry& e);
+
+  // Tells whether the cbranch insn of the specified bb_entry can be removed
+  // safely without triggering any side effects.
+  bool can_remove_cstore (const bb_entry& e,
+			  const cbranch_trace& trace) const;
+
+  // Tells whether the setcc insn of the specified bb_entry can be removed
+  // safely without triggering any side effects.
+  bool can_remove_comparison (const bb_entry& e,
+			      const cbranch_trace& trace) const;
+
+  // Tells whether the two specified comparison rtx can be combined into a
+  // single comparison.
+  bool can_combine_comparisons (const_rtx x, const_rtx y) const;
+
+  // Tells whether the ccreg usage can be extended from the bb_entry on until
+  // the final cbranch of the trace.
+  bool can_extend_ccreg_usage (const bb_entry& e,
+			       const cbranch_trace& trace) const;
+
+  // Create an insn rtx that is a negating reg move (not operation).
+  rtx make_not_reg_insn (rtx dst_reg, rtx src_reg) const;
+
+  // Create an insn rtx that inverts the ccreg.
+  rtx make_inv_ccreg_insn (void) const;
+
+  // Adds the specified insn to the set of modified or newly added insns that
+  // might need splitting at the end of the pass.
+  rtx touched_insn (rtx i);
+
+  // Try to invert the branch condition of the specified trace.
+  bool try_invert_branch_condition (cbranch_trace& trace);
+
+  // Try to optimize a cbranch trace by combining comparisons in BBs and
+  // eliminate the cstores.
+  bool try_combine_comparisons (cbranch_trace& trace,
+				int cstore_count, int inv_cstore_count,
+				cstore_type_t dominating_cstore);
+
+  // Try to optimize a cbranch trace by eliminating the cstores in BBs only.
+  bool try_eliminate_cstores (cbranch_trace& trace,
+			      int cstore_count, int inv_cstore_count,
+			      cstore_type_t dominating_cstore);
+
+  // Given a branch insn, try to optimize its branch condition.
+  // If any insns are modified or added they are added to 'm_touched_insns'.
+  void try_optimize_cbranch (rtx i);
+};
+
+
+const pass_data sh_treg_combine::default_pass_data =
+{
+  RTL_PASS,		// type
+  "",			// name (overwritten by the constructor)
+  OPTGROUP_NONE,	// optinfo_flags
+  true,			// has_gate
+  true,			// has_execute
+  TV_OPTIMIZE,		// tv_id
+  0,			// properties_required
+  0,			// properties_provided
+  0,			// properties_destroyed
+  0,			// todo_flags_start
+  TODO_df_finish | TODO_df_verify	// todo_flags_finish
+  | TODO_verify_rtl_sharing
+};
+
+sh_treg_combine::sh_treg_combine (gcc::context* ctx, bool split_insns,
+				  const char* name)
+: rtl_opt_pass (default_pass_data, ctx),
+  m_split_insns (split_insns),
+  m_ccreg (NULL_RTX)
+{
+  // Overwrite default name in pass_data base class. 
+  this->name = name;
+}
+
+sh_treg_combine::~sh_treg_combine (void)
+{
+}
+
+void sh_treg_combine::update_ccreg_mode (const_rtx cond)
+{
+  if (REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) != REGNO (m_ccreg))
+    return;
+
+  machine_mode m = GET_MODE (XEXP (cond, 0));
+  if (m == GET_MODE (m_ccreg))
+    return;
+
+  PUT_MODE (m_ccreg, m);
+  log_msg ("updated ccreg mode: ");
+  log_rtx (m_ccreg);
+  log_msg ("\n");
+}
+
+bool
+sh_treg_combine::is_cmp_eq_zero (const_rtx i)
+{
+  return i != NULL_RTX && GET_CODE (i) == EQ
+	 && REG_P (XEXP (i, 0)) && XEXP (i, 1) == const0_rtx;
+}
+
+sh_treg_combine::branch_condition_type_t
+sh_treg_combine::branch_condition_type (const_rtx cond) const
+{
+  if (cond == NULL_RTX)
+    return unknown_branch_condition;
+
+  if (GET_CODE (cond) == NE
+      && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (m_ccreg)
+      && XEXP (cond, 1) == const0_rtx)
+    return branch_if_true;
+
+  else if (GET_CODE (cond) == EQ
+      && REG_P (XEXP (cond, 0)) && REGNO (XEXP (cond, 0)) == REGNO (m_ccreg)
+      && XEXP (cond, 1) == const0_rtx)
+    return branch_if_false;
+
+  else
+    return unknown_branch_condition;
+}
+
+bool
+sh_treg_combine::is_normal_ccreg (const_rtx x) const
+{
+  return t_reg_operand (const_cast<rtx> (x), VOIDmode);
+}
+
+bool
+sh_treg_combine::is_inverted_ccreg (const_rtx x) const
+{
+  return negt_reg_operand (const_cast<rtx> (x), VOIDmode);
+}
+
+sh_treg_combine::record_return_t
+sh_treg_combine::record_set_of_reg (rtx reg, rtx start_insn,
+				    bb_entry& new_entry)
+{
+  log_msg ("\n[bb %d]\n", new_entry.bb->index);
+
+  if (start_insn == NULL_RTX)
+    log_return (set_not_found, "set of reg not found.  empty BB?\n");
+
+  new_entry.cstore_type = cstore_unknown;
+
+  for (rtx i = start_insn; i != NULL_RTX; )
+    {
+      new_entry.cstore = find_set_of_reg_bb (reg, i);
+
+      if (new_entry.cstore.set_src () == NULL_RTX)
+	log_return (set_not_found, "set of reg not found (cstore)\n");
+
+      log_insn (new_entry.cstore.insn);
+      log_msg ("\n");
+
+      if (is_normal_ccreg (new_entry.cstore.set_src ()))
+	{
+	  log_msg ("normal condition store\n");
+	  new_entry.cstore_type = cstore_normal;
+	}
+      else if (is_inverted_ccreg (new_entry.cstore.set_src ()))
+	{
+	  log_msg ("inverted condition store\n");
+	  new_entry.cstore_type = cstore_inverted;
+	}
+      else if (REG_P (new_entry.cstore.set_src ()))
+	{
+	  // If it's a reg-reg copy follow the copied reg.
+	  new_entry.cstore_reg_reg_copies.push_back (new_entry.cstore);
+	  reg = new_entry.cstore.set_src ();
+	  i = new_entry.cstore.insn;
+
+	  log_msg ("reg-reg copy.  tracing ");
+	  log_rtx (reg);
+	  log_msg ("\n");
+	  continue;
+	}
+      else
+	log_return (other_set_found, "not a condition store\n");
+
+      gcc_assert (new_entry.cstore_type != cstore_unknown);
+
+      // Now see how the ccreg was set.
+      // For now it must be in the same BB.
+      log_msg ("tracing ccreg\n");
+      new_entry.setcc =
+	  find_set_of_reg_bb (m_ccreg,
+			      prev_nonnote_insn_bb (new_entry.cstore.insn));
+
+      // If cstore was found but setcc was not found continue anyway, as
+      // for some of the optimization types the setcc is irrelevant.
+      if (new_entry.setcc.set_src () == NULL_RTX)
+	log_return (set_found, "set of ccreg not found\n");
+
+      else if (GET_CODE (new_entry.setcc.set_rtx) == SET)
+	{
+	  // Also allow insns that set the ccreg, but are not true comparison
+	  // insns, as long as they are sets and not e.g. clobbers.
+	  log_insn (new_entry.setcc.insn);
+	  log_msg ("\n");
+	  return set_found;
+	}
+      else
+	// If cstore was found but setcc was not found continue anyway, as
+	// for some of the optimization types the setcc is irrelevant.
+ 	log_return (set_found, "unknown set of ccreg\n");
+    }
+
+  log_return (set_not_found, "set of reg not found\n");
+}
+
+bool
+sh_treg_combine::can_remove_cstore (const bb_entry& e,
+				    const cbranch_trace& trace) const
+{
+  if (volatile_insn_p (PATTERN (e.cstore.insn)))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause it's volatile\n");
+    }
+
+  // On SH there are parallel patterns which store the ccreg multiple times.
+  // In this case it's not safe.
+  rtx cstore_pat = PATTERN (e.cstore.insn);
+  if (GET_CODE (cstore_pat) == PARALLEL)
+    for (int i = 0; i < XVECLEN (cstore_pat, 0); ++i)
+      {
+	rtx x = XVECEXP (cstore_pat, 0, i);
+
+	// It's the cstore set that we're referring to, ignore that one.
+	if (x != e.cstore.set_rtx
+	    && GET_CODE (x) == SET && reg_referenced_p (m_ccreg, x))
+	  {
+	    log_msg ("can't remove insn\n");
+	    log_insn (e.cstore.insn);
+	    log_return (false, "\nbecause it's a multiple ccreg store\n");
+	  }
+      }
+
+  // If the cstore sets the ccreg (e.g. negc) and the ccreg is used afterwards
+  // it's not safe.
+  if (modified_in_p (m_ccreg, e.cstore.insn)
+      && !(reg_dead_after_insn (m_ccreg, e.cstore.insn)
+	   || reg_unused_after_insn (m_ccreg, e.cstore.insn)))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause it sets the ccreg\n");
+    }
+
+  // If the cstore destination reg is copied around check the reg-reg
+  // copies.  At every reg-reg copy the copied reg must be dead and there
+  // must not be a usage of the copied regs between the reg-reg copies.
+  // Otherwise we assume that the result of the cstore is used in some
+  // other way.
+  rtx prev_insn = e.cstore.insn;
+  for (std::vector<set_of_reg>::const_reverse_iterator i =
+	   e.cstore_reg_reg_copies.rbegin ();
+       i != e.cstore_reg_reg_copies.rend (); ++i)
+    {
+      if (!reg_dead_after_insn (i->set_src (), i->insn))
+	{
+	  log_msg ("can't remove insn\n");
+	  log_insn (i->insn);
+	  log_return (false, "\nbecause source of reg-reg copy doesn't die\n");
+	}
+
+     if (reg_used_between_p (i->set_src (), prev_insn, i->insn))
+	{
+	  log_msg ("can't remove insn\n");
+	  log_insn (i->insn);
+	  log_return (false, "\nbecause reg %d is otherwise used\n",
+			     REGNO (i->set_src ()));
+	}
+
+      prev_insn = i->insn;
+    }
+
+  // The cstore_dst reg must die after the test before the cbranch, otherwise
+  // it's not safe to remove the cstore.
+  // If the cstore destination reg is copied around check the effective
+  // destination reg of the cstore.  The reg-reg copies are recorded in
+  // reverse order, i.e. the most recent reg-reg copy in the insn list
+  // comes first.
+  rtx cstore_dst = e.cstore_reg_reg_copies.empty ()
+		   ? e.cstore.set_dst ()
+		   : e.cstore_reg_reg_copies.front ().set_dst ();
+
+  if (!reg_dead_after_insn (cstore_dst, trace.setcc.insn))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause its effective target reg %d doesn't die "
+			 "after trace.setcc.insn\n", REGNO (cstore_dst));
+    }
+
+  // Also check that the cstore_dst reg is not used in other reachable code
+  // paths before it dies.
+  // Count the uses of the effective cstore_dst reg (i.e. the last known reg
+  // that holds the cstore value after reg-reg copies) in all BBs that can be
+  // reached from bb_entry's BB including the BB of the cstore insn.
+  // If we get more than 1 uses we assume that it's used somewhere else and is
+  // not safe to be removed.
+  int cstore_dst_use_count = trace_reg_uses (cstore_dst, e.cstore.insn,
+					     trace.setcc.insn);
+  if (cstore_dst_use_count > 1)
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause its effective target reg %d is used "
+			 "in %d other places\n", REGNO (cstore_dst),
+			  cstore_dst_use_count - 1);
+    }
+
+  return true;
+}
+
+bool
+sh_treg_combine::can_remove_comparison (const bb_entry& e,
+					const cbranch_trace&/* trace*/) const
+{
+  // If the ccreg is used otherwise between the comparison and the cstore,
+  // it's not safe.
+  if (reg_used_between_p (m_ccreg, e.setcc.insn, e.cstore.insn))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.setcc.insn);
+      log_return (false, "\nbecause the ccreg is used otherwise\n");
+    }
+
+  if (!reg_dead_after_insn (m_ccreg, e.cstore.insn)
+      && !reg_unused_after_insn (m_ccreg, e.cstore.insn))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause ccreg is not dead or unused afterwards\n");
+    }
+
+  // On SH there are also multiple set patterns that can be used for
+  // comparisons, such as "shll".  It's not safe to remove those.
+  if (multiple_sets (e.setcc.insn))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (e.cstore.insn);
+      log_return (false, "\nbecause it's a multiple set\n");
+    }
+
+  return true;
+}
+
+rtx
+sh_treg_combine::make_not_reg_insn (rtx dst_reg, rtx src_reg) const
+{
+  // This will to go through expanders and may output multiple insns
+  // for multi-word regs.
+  start_sequence ();
+  expand_simple_unop (GET_MODE (dst_reg), NOT, src_reg, dst_reg, 0);
+  rtx i = get_insns ();
+  end_sequence ();
+  return i;
+}
+
+rtx
+sh_treg_combine::make_inv_ccreg_insn (void) const
+{
+  start_sequence ();
+  rtx i = emit_insn (gen_rtx_SET (VOIDmode, m_ccreg,
+				  gen_rtx_fmt_ee (XOR, GET_MODE (m_ccreg),
+						  m_ccreg, const1_rtx)));
+  end_sequence ();
+  return i;
+}
+
+rtx
+sh_treg_combine::touched_insn (rtx i)
+{
+  m_touched_insns.push_back (i);
+  return i;
+}
+
+bool
+sh_treg_combine::can_combine_comparisons (const_rtx x, const_rtx y) const
+{
+  if (GET_CODE (x) != GET_CODE (y))
+    return false;
+
+  rtx x_op0 = XEXP (x, 0);
+  rtx x_op1 = XEXP (x, 1);
+
+  rtx y_op0 = XEXP (y, 0);
+  rtx y_op1 = XEXP (y, 1);
+
+  if (!REG_P (x_op0) || !REG_P (y_op0))
+    return false;
+
+  if (GET_MODE (x_op0) != GET_MODE (y_op0))
+    return false;
+
+  // rtx_equal_p also compares the reg numbers which we do not care about
+  // here, as long as both are regs and the modes are the same.
+  if (REG_P (x_op1))
+    return REG_P (y_op1) && GET_MODE (x_op1) == GET_MODE (y_op1);
+
+  return rtx_equal_p (x_op1, y_op1);
+}
+
+bool
+sh_treg_combine::can_extend_ccreg_usage (const bb_entry& e,
+					 const cbranch_trace& trace) const
+{
+  // Check if the ccreg is not modified by other insins in the BB path until
+  // the final cbranch of the trace.
+  // Start checking after the cstore that follows the setcc, assuming that
+  // the cstore will be removed.
+
+  // The assumption here is that the specified bb_entry's BB is a direct
+  // predecessor of the trace.cbranch_insn's BB.
+  if (e.bb != trace.bb () && !is_adjacent_bb (e.bb, trace.bb ()))
+    log_return (false,
+	"can't extend ccreg usage -- [bb %d] and [bb %d] are not adjacent\n",
+	e.bb->index, trace.bb ()->index);
+
+  if (e.cstore.empty ())
+    log_return (false, "can't extend ccreg usage -- no cstore\n");
+
+  // The entry's cstore is in the same BB as the final cbranch.
+  if (e.bb == trace.bb ())
+    {
+      if (reg_set_between_p (m_ccreg, e.cstore.insn, trace.setcc.insn))
+	log_return (false,
+	    "can't extend ccreg usage -- it's modified between e.cstore.insn "
+	    "and trace.setcc.insn");
+      else
+	return true;
+    }
+
+  // The entry's cstore and the final cbranch are in different BBs.
+  if (reg_set_between_p (m_ccreg, e.cstore.insn, NEXT_INSN (BB_END (e.bb))))
+    log_return (false,
+	"can't extend ccreg usage -- it's modified in [bb %d]", e.bb->index);
+
+  if (reg_set_between_p (m_ccreg, PREV_INSN (BB_HEAD (trace.bb ())),
+			 trace.setcc.insn))
+    log_return (false,
+	"can't extend ccreg usage -- it's modified in [bb %d]",
+	trace.bb ()->index);
+
+  return true;
+}
+
+bool
+sh_treg_combine::try_invert_branch_condition (cbranch_trace& trace)
+{
+  log_msg ("inverting branch condition\n");
+
+  if (!invert_jump_1 (trace.cbranch_insn, JUMP_LABEL (trace.cbranch_insn)))
+    log_return (false, "invert_jump_1 failed\n");
+
+  if (verify_changes (num_validated_changes ()))
+    confirm_change_group ();
+  else
+    log_return (false, "verify_changed failed\n");
+
+  touched_insn (trace.cbranch_insn);
+  return true;
+}
+
+bool
+sh_treg_combine::try_combine_comparisons (cbranch_trace& trace,
+					  int cstore_count,
+					  int inv_cstore_count,
+					  cstore_type_t dominating_cstore)
+{
+  log_msg ("\ntry_combine_comparisons\n");
+
+  // This function will always try to create new pseudos.
+  if (!can_create_pseudo_p ())
+    log_return (false, "can't create pseudos\n");
+
+  // Check that all ccset insns are comparisons and all comparison types in
+  // all BBs are the same and could be combined into one single comparison.
+  rtx comp = NULL_RTX;
+  rtx comp_insn = NULL_RTX;
+
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      int i_empty_count = i->setcc.empty () + i->cstore.empty ();
+
+      // A completly empty entry is OK (could be the BB of the cbranch).
+      if (i_empty_count == 2)
+	continue;
+
+      // Otherwise we need both, the setcc and the cstore.
+      if (i_empty_count != 0)
+	log_return (false, "bb entry is not a setcc cstore pair\n");
+
+      rtx other_comp = i->comparison_rtx ();
+
+      if (!COMPARISON_P (other_comp))
+	{
+	  log_msg ("setcc is not a comparison:\n");
+	  log_rtx (other_comp);
+	  log_return (false, "\n");
+	}
+
+      if (comp_insn == NULL_RTX)
+	{
+	  comp = other_comp;
+	  comp_insn = i->setcc.insn;
+	}
+      else if (!can_combine_comparisons (comp, other_comp))
+	return false;
+
+      // The goal here is to eliminate all cstores and comparisons in the BBs.
+      // Thus check if every cstore can actually be removed safely.
+      if (!can_remove_cstore (*i, trace) || !can_remove_comparison (*i, trace))
+	return false;
+    }
+
+  // FIXME: The first operand of the comparison must be a simple reg.
+  // This effectively prohibits combining div0s comparisons such as
+  //    (lt:SI (xor:SI (reg:SI) (reg:SI)))
+  if (!REG_P (XEXP (comp, 0)))
+    {
+      log_msg ("comparison operand 0\n");
+      log_rtx (XEXP (comp, 0));
+      log_return (false, "\nis not a reg\n");
+    }
+
+  rtx comp_op0 = gen_reg_rtx (GET_MODE (XEXP (comp, 0)));
+  rtx comp_op1 = REG_P (XEXP (comp, 1))
+		 ? gen_reg_rtx (GET_MODE (XEXP (comp, 1)))
+		 : XEXP (comp, 1);
+
+  // If there are both, inverting and non-inverting cstores, they can only
+  // be eliminated if the comparison can be inverted.  We assume that the
+  // comparison insns that we find are already minimal and canonicalized.
+  // There is one special case though, where an integer comparison
+  //     (eq (reg) (const_int 0))
+  // can be inverted with a sequence
+  //     (eq (not (reg)) (const_int 0))
+  if (inv_cstore_count != 0 && cstore_count != 0)
+    {
+      if (make_not_reg_insn (comp_op0, comp_op0) == NULL_RTX)
+	log_return (false, "make_not_reg_insn failed.\n");
+
+      for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+	   i != trace.bb_entries.end (); ++i)
+	{
+	  if (i->setcc.empty () || i->cstore.empty ())
+	    continue;
+
+	  if (i->cstore_type != dominating_cstore
+	      && !is_cmp_eq_zero (i->comparison_rtx ()))
+	    {
+	      log_msg ("can't invert comparison in insn\n");
+	      log_insn (i->setcc.insn);
+	      log_return (false,
+		"\nbecause it's not a (eq (reg) (const_int 0))\n");
+	    }
+	}
+    }
+
+  if (dominating_cstore == cstore_normal
+      && !try_invert_branch_condition (trace))
+    return false;
+
+  // Replace the test insn before the cbranch with the common comparison.
+  // Instead of creating a new insn from scratch we copy the common comparison
+  // pattern.  This simplifies handling parallel comparison patterns, such as
+  // FP comparisons on SH, which have an extra use on FPSCR.
+  log_msg ("installing common comparison in [bb %d]\n", trace.bb ()->index);
+
+  rtx common_comp_pat = copy_rtx (PATTERN (comp_insn));
+  rtx common_comp = const_cast<rtx> (set_of (m_ccreg, common_comp_pat));
+
+  gcc_assert (common_comp != NULL_RTX);
+
+  XEXP (XEXP (common_comp, 1), 0) = comp_op0;
+  XEXP (XEXP (common_comp, 1), 1) = comp_op1;
+
+  log_rtx (common_comp_pat);
+  log_msg ("\n");
+
+  rtx common_comp_insn = touched_insn (emit_insn_after (common_comp_pat,
+							trace.setcc.insn));
+
+  if (REG_P (comp_op0))
+    add_reg_note (common_comp_insn, REG_DEAD, copy_rtx (comp_op0));
+  if (REG_P (comp_op1))
+    add_reg_note (common_comp_insn, REG_DEAD, copy_rtx (comp_op1));
+
+  delete_insn (trace.setcc.insn);
+
+  // Replace comparison and cstore insns with reg-reg moves in all BBs.
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      if (i->setcc.empty () || i->cstore.empty ())
+	continue;
+
+      rtx i_comp_op0 = XEXP (i->comparison_rtx (), 0);
+      rtx i_comp_op1 = XEXP (i->comparison_rtx (), 1);
+
+      if (i->cstore_type == dominating_cstore)
+	{
+	  log_msg ("replacing comparison and cstore with reg move "
+		   "in [bb %d]\n", i->bb->index);
+
+	  rtx new_i = touched_insn (
+		emit_insn_after (gen_move_insn (comp_op0, i_comp_op0),
+				 i->setcc.insn));
+
+	  if (REG_P (i_comp_op0)
+	      && reg_dead_after_insn (i_comp_op0, i->setcc.insn))
+	    add_reg_note (new_i, REG_DEAD, copy_rtx (i_comp_op0));
+
+	  // If the second operand is a reg, have to emit a move insn.
+	  // Otherwise assume it's a const_int and just reference it.
+	  if (REG_P (comp_op1))
+	    {
+	      new_i = touched_insn (
+		  emit_insn_after (gen_move_insn (comp_op1, i_comp_op1),
+				   i->setcc.insn));
+
+	      if (reg_dead_after_insn (i_comp_op1, i->setcc.insn))
+		add_reg_note (new_i, REG_DEAD, copy_rtx (i_comp_op1));
+	    }
+	}
+      else
+	{
+	  log_msg ("replacing comparison and cstore with inverting reg move "
+		   "in [bb %d]\n", i->bb->index);
+
+	  rtx new_i = make_not_reg_insn (comp_op0, i_comp_op0);
+	  if (REG_P (i_comp_op0)
+	      && reg_dead_after_insn (i_comp_op0, i->setcc.insn))
+	    add_reg_note (new_i, REG_DEAD, copy_rtx (i_comp_op0));
+
+	  touched_insn (emit_insn_after (new_i, i->setcc.insn));
+	}
+
+      delete_insn (i->cstore.insn);
+      delete_insn (i->setcc.insn);
+    }
+
+  return true;
+}
+
+bool
+sh_treg_combine::try_eliminate_cstores (cbranch_trace& trace,
+					int cstore_count, int inv_cstore_count,
+					cstore_type_t dominating_cstore)
+{
+  log_msg ("\ntry_eliminate_cstores\n");
+
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      // A completly empty entry is OK (could be the BB of the cbranch).
+      if (i->setcc.empty () && i->cstore.empty ())
+	continue;
+
+      // We're going to eliminate cstores, but for that they have to be
+      // there.  We don't care about the setcc in this case.
+      if (i->cstore.empty ())
+	log_return (false, "bb entry cstore empty -- aborting\n");
+
+      // The goal here is to eliminate all cstores in the BBs and extend the
+      // ccreg usage.
+      if (!can_extend_ccreg_usage (*i, trace))
+	return false;
+
+      // If the cstore can't be removed we can keep it around as long as
+      // it doesn't modify the ccreg.
+      if (!can_remove_cstore (*i, trace)
+	  && modified_in_p (m_ccreg, i->cstore.insn))
+	log_return (false, "cstore sets ccreg -- aborting\n");
+    }
+
+  // If there are both, inverting and non-inverting cstores, we'll have to
+  // invert the ccreg as a replacement for one of them.
+  if (cstore_count != 0 && inv_cstore_count != 0)
+    {
+      rtx i = make_inv_ccreg_insn ();
+      if (recog_memoized (i) < 0)
+	{
+	  log_msg ("failed to match ccreg inversion insn:\n");
+	  log_rtx (PATTERN (i));
+	  log_return (false, "\naborting\n");
+	}
+    }
+
+  if (dominating_cstore == cstore_normal
+      && !try_invert_branch_condition (trace))
+    return false;
+
+  // Eliminate cstores in all BBs.
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      if (i->cstore.empty ())
+	continue;
+
+      if (i->cstore_type == dominating_cstore)
+	log_msg ("removing cstore in [bb %d]\n", i->bb->index);
+      else
+	{
+	  log_msg ("replacing cstore with ccreg inversion in [bb %d]\n",
+		   i->bb->index);
+
+	  touched_insn (
+	    emit_insn_after (make_inv_ccreg_insn (), i->cstore.insn));
+	}
+
+      if (can_remove_cstore (*i, trace))
+	delete_insn (i->cstore.insn);
+    }
+
+  log_msg ("removing test insn before cbranch\n");
+  delete_insn (trace.setcc.insn);
+  return true;
+}
+
+void
+sh_treg_combine::try_optimize_cbranch (rtx insn)
+{
+  cbranch_trace trace (insn);
+
+  log_msg ("\n\n--------------------------------------\n");
+  log_msg ("found cbranch insn in [bb %d]:\n", trace.bb ()->index);
+  log_insn (insn);
+
+  trace.cbranch_type = branch_condition_type (trace.branch_condition_rtx ());
+
+  if (trace.cbranch_type == branch_if_true)
+    log_msg ("condition: branch if true\n");
+  else if (trace.cbranch_type == branch_if_false)
+    log_msg ("condition: branch if false\n");
+  else
+    {
+      log_msg ("unknown branch condition\n");
+      log_rtx (trace.branch_condition_rtx ());
+      log_return_void ("\n");
+    }
+
+  update_ccreg_mode (trace.branch_condition_rtx ());
+
+  // Scan the insns backwards for an insn that sets the ccreg by testing a
+  // reg against zero like
+  //   (set (reg ccreg) (eq (reg) (const_int 0)))
+  // The testing insn could also be outside of the current basic block, but
+  // for now we limit the search to the current basic block.
+  trace.setcc = find_set_of_reg_bb (m_ccreg, prev_nonnote_insn_bb (insn));
+
+  if (!is_cmp_eq_zero (trace.setcc.set_src ()))
+    log_return_void ("could not find set of ccreg in current BB\n");
+
+  rtx trace_reg = XEXP (trace.setcc.set_src (), 0);
+
+  log_msg ("set of ccreg:\n");
+  log_insn (trace.setcc.insn);
+
+  // See if we can remove the trace.setcc insn safely.
+  if (reg_used_between_p (m_ccreg, trace.setcc.insn, trace.cbranch_insn))
+    log_return_void ("ccreg used between testing insn and branch insn\n");
+
+  if (volatile_insn_p (PATTERN (trace.setcc.insn)))
+    {
+      log_msg ("can't remove insn\n");
+      log_insn (trace.setcc.insn);
+      log_return_void ("\nbecause it's volatile\n");
+    }
+
+  // Now that we have an insn which tests some reg and sets the condition
+  // reg before the conditional branch, try to figure out how that tested
+  // reg was formed, i.e. find all the insns that set the tested reg in
+  // some way.
+  // The tested reg might be set in multiple basic blocks so we need to
+  // check all basic blocks which can reach this current basic block.
+  // If the set of reg is an inverting or non-inverting store of the condition
+  // register, check how the ccreg value was obtained.
+  log_msg ("\ntracing ");
+  log_rtx (trace_reg);
+  log_msg ("\n");
+
+
+  // First check the basic block where the conditional branch is in.
+  // If we find it here there's no point in checking other BBs.
+  trace.bb_entries.push_front (bb_entry (trace.bb ()));
+
+  record_return_t res =
+      record_set_of_reg (trace_reg, prev_nonnote_insn_bb (trace.setcc.insn),
+			 trace.bb_entries.front ());
+
+  if (res == other_set_found)
+    log_return_void ("other set found - aborting trace\n");
+  else if (res == set_not_found)
+    {
+      // It seems the initial search in the BB of the conditional branch
+      // didn't find anything.  Now look in all predecessor BBs.
+      for (edge_iterator ei = ei_start (trace.bb ()->preds);
+	   !ei_end_p (ei); ei_next (&ei))
+	{
+	  edge e = ei_edge (ei);
+	  trace.bb_entries.push_front (bb_entry (e->src));
+
+	  res = record_set_of_reg (trace_reg, BB_END (e->src),
+				   trace.bb_entries.front ());
+	  if (res != set_found)
+	    log_return_void ("set not found - aborting trace\n");
+	}
+    }
+
+  if (dump_file != NULL)
+    {
+      log_msg ("\ncbranch trace summary:\n");
+      for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+	   i != trace.bb_entries.end (); ++i)
+	{
+	  log_msg ("\n[bb %d]\n", i->bb->index);
+	  if (!i->setcc.empty ())
+	    {
+	      log_rtx (i->setcc.set_rtx);
+	      log_msg ("\n");
+	    }
+	  if (!i->cstore.empty ())
+	    {
+	      log_rtx (i->cstore.set_rtx);
+	      log_msg ("\n");
+	    }
+
+	  for (std::vector<set_of_reg>::const_reverse_iterator j =
+		   i->cstore_reg_reg_copies.rbegin ();
+	       j != i->cstore_reg_reg_copies.rend (); ++j)
+	    {
+	      log_rtx (j->set_rtx);
+	      log_msg ("\n");
+	    }
+	}
+
+      log_rtx (trace.setcc.set_rtx);
+      log_msg ("\n");
+      log_rtx (PATTERN (trace.cbranch_insn));
+      log_msg ("\n");
+    }
+
+  // Check that we don't have any empty BBs.
+  // Only the BB with the cbranch may be empty.
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    if (i->setcc.empty () && i->cstore.empty () && i->bb != trace.bb ())
+      log_return_void ("\n[bb %d] is empty - aborting.\n", i->bb->index);
+
+  // Determine the dominating cstore type
+  // FIXME: Try to take the probabilities of the BBs into account somehow.
+  int cstore_count = 0;
+  int inv_cstore_count = 0;
+
+  for (std::list<bb_entry>::const_iterator i = trace.bb_entries.begin ();
+       i != trace.bb_entries.end (); ++i)
+    {
+      if (i->cstore_type == cstore_normal)
+	cstore_count += 1;
+      else if (i->cstore_type == cstore_inverted)
+	inv_cstore_count += 1;
+    }
+
+  log_msg ("cstore count = %d  inverted cstore count = %d\n",
+	   cstore_count, inv_cstore_count);
+
+  // This puts a priority on inverting cstores.
+  cstore_type_t dominating_cstore = inv_cstore_count >= cstore_count
+				    ? cstore_inverted
+				    : cstore_normal;
+
+  if (dominating_cstore == cstore_inverted)
+      log_msg ("will try to eliminate inverted cstore\n");
+  else if (dominating_cstore == cstore_normal)
+    {
+      log_msg ("will try to eliminate normal cstore\n");
+      if (!trace.can_invert_condition ())
+	log_return_void ("branch condition can't be inverted - aborting\n");
+    }
+  else
+    gcc_unreachable ();
+
+  if (try_combine_comparisons (trace, cstore_count, inv_cstore_count,
+			       dominating_cstore))
+    return;
+
+  try_eliminate_cstores (trace, cstore_count, inv_cstore_count,
+			 dominating_cstore);
+}
+
+bool
+sh_treg_combine::gate (void)
+{
+  return optimize > 0;
+}
+
+unsigned int
+sh_treg_combine::execute (void)
+{
+  unsigned int ccr0 = INVALID_REGNUM;
+  unsigned int ccr1 = INVALID_REGNUM;
+
+  if (targetm.fixed_condition_code_regs (&ccr0, &ccr1)
+      && ccr0 != INVALID_REGNUM)
+    {
+      // Initially create a reg rtx with VOIDmode.
+      // When the first conditional branch is discovered, the mode is changed
+      // to the mode that is actually used by the target.
+      m_ccreg = gen_rtx_REG (VOIDmode, ccr0);
+    }
+
+  if (m_ccreg == NULL_RTX)
+    log_return (0, "no ccreg.\n\n");
+
+  if (STORE_FLAG_VALUE != 1)
+    log_return (0, "unsupported STORE_FLAG_VALUE %d", STORE_FLAG_VALUE);
+
+  log_msg ("ccreg: ");
+  log_rtx (m_ccreg);
+  log_msg ("  STORE_FLAG_VALUE = %d\n", STORE_FLAG_VALUE);
+
+  // Look for basic blocks that end with a conditional branch and try to
+  // optimize them.
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx i = BB_END (bb);
+      if (any_condjump_p (i) && onlyjump_p (i))
+	try_optimize_cbranch (i);
+    }
+
+  log_msg ("\n\n");
+
+  // If new insns are created and this pass is executed after all insns
+  // have been split already, we must split the insns we've changed or added
+  // ourselves here.
+  // FIXME: Multi-word operations (which emit multiple insns) are not handled
+  // properly here, since only one insn will end up in 'm_touched_insns'.
+  // On SH this is not a problem though.
+  if (m_split_insns)
+    for (std::vector<rtx>::const_iterator i = m_touched_insns.begin ();
+	 i != m_touched_insns.end (); ++i)
+      {
+	log_msg ("trying to split insn:\n");
+	log_insn (*i);
+	log_msg ("\n");
+	try_split (PATTERN (*i), *i, 0);
+      }
+
+  m_touched_insns.clear ();
+  log_return (0, "\n\n");
+}
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// This allows instantiating the pass somewhere else without having to pull
+// in a header file.
+opt_pass*
+make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
+			   const char* name)
+{
+  return new sh_treg_combine (ctx, split_insns, name);
+}
diff --git a/gcc-4.9/gcc/config/sh/shmedia.h b/gcc-4.9/gcc/config/sh/shmedia.h
new file mode 100644
index 000000000..3df996274
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/shmedia.h
@@ -0,0 +1,30 @@
+/* Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef _SHMEDIA_H
+#define _SHMEDIA_H
+
+#include <ushmedia.h>
+#include <sshmedia.h>
+
+#endif
diff --git a/gcc-4.9/gcc/config/sh/shmedia.md b/gcc-4.9/gcc/config/sh/shmedia.md
new file mode 100644
index 000000000..9ca829d2d
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/shmedia.md
@@ -0,0 +1,94 @@
+;; DFA scheduling description for SH-5 SHmedia instructions.
+;; Copyright (C) 2004-2014 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; This is just a conversion of the old model using define_function_unit.
+
+;; When executing SHmedia code, the SH-5 is a fairly straightforward
+;; single-issue machine.  It has four pipelines, the branch unit (br),
+;; the integer and multimedia unit (imu), the load/store unit (lsu), and
+;; the floating point unit (fpu).
+
+(define_automaton "sh5inst_pipe, sh5fpu_pipe")
+
+(define_cpu_unit "sh5issue" "sh5inst_pipe")
+
+(define_cpu_unit "sh5fds" "sh5fpu_pipe")
+
+;; Every instruction on SH-5 occupies the issue resource for at least one
+;; cycle.
+(define_insn_reservation "shmedia1" 1
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "!pt_media,ptabs_media,invalidate_line_media,dmpy_media,load_media,fload_media,fcmp_media,fmove_media,fparith_media,dfparith_media,fpconv_media,dfpconv_media,dfmul_media,store_media,fstore_media,mcmp_media,mac_media,d2mpy_media,atrans_media,ustore_media"))
+  "sh5issue")
+
+;; Specify the various types of instruction which have latency > 1
+(define_insn_reservation "shmedia2" 2
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "mcmp_media"))
+  "sh5issue")
+
+(define_insn_reservation "shmedia3" 3
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "dmpy_media,load_media,fcmp_media,mac_media"))
+  "sh5issue")
+;; but see sh_adjust_cost for mac_media exception.
+
+(define_insn_reservation "shmedia4" 4
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "fload_media,fmove_media"))
+  "sh5issue")
+
+(define_insn_reservation "shmedia_d2mpy" 4
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "d2mpy_media"))
+  "sh5issue*2")
+
+(define_insn_reservation "shmedia5" 5
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "pt_media,ptabs_media"))
+  "sh5issue")
+
+(define_insn_reservation "shmedia6" 6
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "fparith_media,dfparith_media,fpconv_media,dfpconv_media"))
+  "sh5issue")
+
+(define_insn_reservation "shmedia_invalidate" 7
+  (and (eq_attr "pipe_model" "sh5media")
+       (eq_attr "type" "invalidate_line_media"))
+  "sh5issue*7")
+
+(define_insn_reservation "shmedia_dfmul" 9
+  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfmul_media"))
+  "sh5issue*4")
+
+(define_insn_reservation "shmedia_atrans" 10
+  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "atrans_media"))
+  "sh5issue*5")
+
+;; Floating-point divide and square-root occupy an additional resource,
+;; which is not internally pipelined.  However, other instructions
+;; can continue to issue.
+(define_insn_reservation "shmedia_fdiv" 19
+  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "fdiv_media"))
+  "sh5issue+sh5fds,sh5fds*18")
+
+(define_insn_reservation "shmedia_dfdiv" 35
+  (and (eq_attr "pipe_model" "sh5media") (eq_attr "type" "dfdiv_media"))
+  "sh5issue+sh5fds,sh5fds*34")
diff --git a/gcc-4.9/gcc/config/sh/sshmedia.h b/gcc-4.9/gcc/config/sh/sshmedia.h
new file mode 100644
index 000000000..5cf164733
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sshmedia.h
@@ -0,0 +1,78 @@
+/* Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* sshmedia.h: Intrinsics corresponding to SHmedia instructions that
+   may only be executed in privileged mode.  */
+
+#ifndef _SSHMEDIA_H
+#define _SSHMEDIA_H
+
+#if __SHMEDIA__
+__inline__ static unsigned long long sh_media_GETCON (unsigned int k)
+  __attribute__((always_inline));
+
+__inline__ static
+unsigned long long
+sh_media_GETCON (unsigned int k)
+{
+  unsigned long long res;
+  __asm__ __volatile__ ("getcon	cr%1, %0" : "=r" (res) : "n" (k));
+  return res;
+}
+
+__inline__ static void sh_media_PUTCON (unsigned long long mm, unsigned int k)
+  __attribute__((always_inline));
+
+__inline__ static
+void
+sh_media_PUTCON (unsigned long long mm, unsigned int k)
+{
+  __asm__ __volatile__ ("putcon	%0, cr%1" : : "r" (mm), "n" (k));
+}
+
+__inline__ static
+unsigned long long
+sh_media_GETCFG (unsigned long long mm, int s)
+{
+  unsigned long long res;
+  __asm__ __volatile__ ("getcfg	%1, %2, %0" : "=r" (res) : "r" (mm), "n" (s));
+  return res;
+}
+
+__inline__ static
+void
+sh_media_PUTCFG (unsigned long long mm, int s, unsigned long long mw)
+{
+  __asm__ __volatile__ ("putcfg	%0, %1, %2" : : "r" (mm), "n" (s), "r" (mw));
+}
+
+__inline__ static
+void
+sh_media_SLEEP (void)
+{
+  __asm__ __volatile__ ("sleep");
+}
+#endif
+
+#endif
diff --git a/gcc-4.9/gcc/config/sh/superh.h b/gcc-4.9/gcc/config/sh/superh.h
new file mode 100644
index 000000000..98bc197a8
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/superh.h
@@ -0,0 +1,104 @@
+/* Definitions of target machine for gcc for Super-H using sh-superh-elf.
+   Copyright (C) 2001-2014 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* This header file is used when the vendor name is set to 'superh'.
+   config.gcc already configured the compiler for SH4 only and switched
+   the default endianess to little (although big endian is still available).
+   This file configures the spec file to the default board configuration
+   but in such a way that it can be overridden by a boardspecs file
+   (using the -specs= option). This file is expected to disable the
+   defaults and provide options --defsym _start and --defsym _stack
+   which are required by the SuperH configuration of GNU ld.
+
+   This file is intended to override sh.h.  */
+
+#ifndef _SUPERH_H
+#define _SUPERH_H
+#endif
+
+
+/* Override the linker spec strings to use the new emulation
+   The specstrings are concatenated as follows
+   LINK_EMUL_PREFIX.(''|'32'|'64'|LINK_DEFAULT_CPU_EMUL).SUBTARGET_LINK_EMUL_SUFFIX
+*/
+#undef LINK_EMUL_PREFIX
+#undef SUBTARGET_LINK_EMUL_SUFFIX
+
+#define LINK_EMUL_PREFIX "superh"
+#define SUBTARGET_LINK_EMUL_SUFFIX ""
+
+/* Add the SUBTARGET_LINK_SPEC to add the board and runtime support and
+   change the endianness */
+#undef SUBTARGET_LINK_SPEC
+#if  TARGET_ENDIAN_DEFAULT == MASK_LITTLE_ENDIAN
+#define SUBTARGET_LINK_SPEC "%(board_link) %(ldruntime) %{ml|!mb:-EL}%{mb:-EB}"
+#else
+#define SUBTARGET_LINK_SPEC "%(board_link) %(ldruntime) %{ml:-EL}%{mb|!ml:-EB}"
+#endif
+
+
+/* This is used by the link spec if the boardspecs file is not used
+   (for whatever reason).
+   If the boardspecs file overrides this then an alternative can be used. */
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS \
+{ "board_link", "--defsym _start=0x1000 --defsym _stack=0x30000" }, \
+{ "asruntime", "" }, \
+{ "cppruntime", "-D__GDB_SIM__" }, \
+{ "cc1runtime", "" }, \
+{ "ldruntime", "" }, \
+{ "libruntime", "-lc -lgloss" }
+
+
+/* Set the SUBTARGET_CPP_SPEC to define __EMBEDDED_CROSS__ which has an effect
+   on newlib and provide the runtime support */
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC \
+"-D__EMBEDDED_CROSS__ %{m4-100*:-D__SH4_100__} %{m4-200*:-D__SH4_200__} %{m4-300*:-D__SH4_300__} %{m4-340:-D__SH4_340__} %{m4-400:-D__SH4_400__} %{m4-500:-D__SH4_500__} \
+%(cppruntime)"
+
+/* Override the SUBTARGET_ASM_SPEC to add the runtime support */
+#undef SUBTARGET_ASM_SPEC
+#define SUBTARGET_ASM_SPEC "%{m4-100*|m4-200*:-isa=sh4} %{m4-400|m4-340:-isa=sh4-nommu-nofpu} %{m4-500:-isa=sh4-nofpu} %(asruntime)"
+
+/* Override the SUBTARGET_ASM_RELAX_SPEC so it doesn't interfere with the
+   runtime support by adding -isa=sh4 in the wrong place.  */
+#undef SUBTARGET_ASM_RELAX_SPEC
+#define SUBTARGET_ASM_RELAX_SPEC "%{!m4-100*:%{!m4-200*:%{!m4-300*:%{!m4-340:%{!m4-400:%{!m4-500:-isa=sh4}}}}}}"
+
+/* Create the CC1_SPEC to add the runtime support */
+#undef CC1_SPEC
+#define CC1_SPEC "%(cc1runtime)"
+
+#undef CC1PLUS_SPEC
+#define CC1PLUS_SPEC "%(cc1runtime)"
+
+
+/* Override the LIB_SPEC to add the runtime support */
+#undef LIB_SPEC
+#define LIB_SPEC "%{!shared:%{!symbolic:%(libruntime) -lc}} %{pg:-lprofile -lc}"
+
+/* Override STARTFILE_SPEC to add profiling and MMU support.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: %{!m4-400*:%{!m4-340*: %{pg:gcrt1-mmu.o%s}%{!pg:crt1-mmu.o%s}}}} \
+   %{!shared: %{m4-340*|m4-400*: %{pg:gcrt1.o%s}%{!pg:crt1.o%s}}} \
+   crti.o%s \
+   %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
diff --git a/gcc-4.9/gcc/config/sh/superh.opt b/gcc-4.9/gcc/config/sh/superh.opt
new file mode 100644
index 000000000..b85abddaf
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/superh.opt
@@ -0,0 +1,10 @@
+;; The -mboard and -mruntime options need only be accepted here, they are
+;; actually processed by supplementary specs files.
+
+mboard=
+Target RejectNegative Joined
+Board name [and memory region].
+
+mruntime=
+Target RejectNegative Joined
+Runtime name.
diff --git a/gcc-4.9/gcc/config/sh/sync.md b/gcc-4.9/gcc/config/sh/sync.md
new file mode 100644
index 000000000..a0a22a1f5
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/sync.md
@@ -0,0 +1,1388 @@
+;; GCC machine description for SH synchronization instructions.
+;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;;
+;; Atomic integer operations for the Renesas / SuperH SH CPUs.
+;;
+;; On SH CPUs atomic integer operations can be done either in 'software' or
+;; in 'hardware' in various styles.  True hardware support was introduced
+;; with the SH4A.  Some SH2A dual-core models (e.g. SH7205) also come with
+;; 'semaphore' hardware registers, but these are currently unsupported.
+;; All SH CPUs support the 'tas.b' instruction, which can be optionally used
+;; to implement the 'atomic_test_and_set' builtin.
+;; The following atomic options and models are supported.
+;;
+;; tas.b atomic_test_and_set (-mtas)
+;;
+;; Depending on the particular hardware configuration, usage of the 'tas.b'
+;; instruction might be undesired or even unsafe.  Thus, it has to be
+;; enabled by the user explicitly.  If it is not enabled, the
+;; 'atomic_test_and_set' builtin is implemented either with hardware or with
+;; software atomics, depending on which is enabled.  It is also possible to
+;; enable the 'tas.b' instruction only, without enabling support for the 
+;; other atomic operations.
+;;
+;;
+;; Hardware Atomics (-matomic-model=hard-llcs; SH4A only)
+;;
+;; Hardware atomics implement all atomic operations using the 'movli.l' and
+;; 'movco.l' instructions that are availble on SH4A.  On multi-core hardware
+;; configurations hardware atomics is the only safe mode.
+;; However, it can also be safely used on single-core configurations.
+;; Since these instructions operate on SImode memory only, QImode and HImode
+;; have to be emulated with SImode and subreg masking, which results in
+;; larger code.
+;;
+;;
+;; gUSA Software Atomics (-matomic-model=soft-gusa; SH3*, SH4* only)
+;;
+;; On single-core systems there can only be one execution context running
+;; at a given point in time.  This allows the usage of rewindable atomic
+;; sequences, which effectively emulate locked-load / conditional-store
+;; operations.  This requires complementary support in the interrupt / 
+;; exception handling code (e.g. kernel) and does not work safely on multi-
+;; core configurations.
+;;
+;; When an execution context is interrupted while it is an atomic
+;; sequence, the interrupted context's PC is rewound to the beginning of
+;; the atomic sequence by the interrupt / exception handling code, before
+;; transferring control to another execution context.  This is done by
+;; something like...
+;;
+;;	if (interrupted_context_in_atomic_sequence
+;;	    && interrupted_pc < atomic_exitpoint)
+;;	  interrupted_pc = atomic_entrypoint;
+;;
+;; This method is also known as gUSA ("g" User Space Atomicity) and the
+;; Linux kernel for SH3/SH4 implements support for such software atomic
+;; sequences.  It can also be implemented in freestanding environments.
+;;
+;; For this the following atomic sequence ABI is used.
+;;
+;; r15 >= 0:	Execution context is not in an atomic sequence.
+;;
+;; r15  < 0:	Execution context is in an atomic sequence and r15
+;;		holds the negative byte length of the atomic sequence.
+;;		In this case the following applies:
+;;
+;;		r0:	PC of the first instruction after the atomic
+;;			write-back instruction (exit point).
+;;			The entry point PC of the atomic sequence can be 
+;;			determined by doing r0 + r15.
+;;
+;;		r1:	Saved r15 stack pointer before entering the
+;;			atomic sequence.
+;;
+;; An example atomic add sequence would look like:
+;;
+;;	mova	.Lend,r0		! .Lend must be 4-byte aligned.
+;;	mov	r15,r1
+;;	.align 2			! Insert aligning nop if needed.
+;;	mov	#(.Lstart - .Lend),r15	! Enter atomic sequence
+;;.Lstart:
+;;	mov.l	@r4,r2			! read value
+;;	add	r2,r5			! modify value
+;;	mov.l	r5,@r4			! write-back
+;;.Lend:
+;;	mov	r1,r15			! Exit atomic sequence
+;;					! r2 holds the previous value.
+;;					! r5 holds the new value.
+;;
+;; Notice that due to the restrictions of the mova instruction, the .Lend
+;; label must always be 4-byte aligned.  Aligning the .Lend label would
+;; potentially insert a nop after the write-back instruction which could
+;; make the sequence to be rewound, although it has already passed the
+;; write-back instruction.  This would make it execute twice.
+;; For correct operation the atomic sequences must not be rewound after
+;; they have passed the write-back instruction.
+;;
+;; This is model works only on SH3* and SH4* because the stack pointer (r15)
+;; is set to an invalid pointer temporarily.  SH1* and SH2* CPUs will try
+;; to push SR and PC registers on the stack when an interrupt / exception
+;; occurs, and thus require the stack pointer (r15) always to be valid.
+;;
+;;
+;; TCB Software Atomics (-matomic-model=soft-tcb)
+;;
+;; This model is a variation of the gUSA model.  The concept of rewindable
+;; atomic sequences is the same, but it does not use the stack pointer (r15)
+;; for signaling the 'is in atomic sequence' condition.  Instead, a variable
+;; in the thread control block (TCB) is set to hold the exit point of the
+;; atomic sequence.  This assumes that the GBR is used as a thread pointer
+;; register.  The offset of the variable in the TCB to be used must be
+;; specified with an additional option 'gbr-offset', such as:
+;;	-matomic-model=soft-tcb,gbr-offset=4
+;;
+;; For this model the following atomic sequence ABI is used.
+;; 
+;; @(#x,gbr) == 0:  Execution context is not in an atomic sequence.
+;;
+;; @(#x,gbr) != 0:  Execution context is in an atomic sequence.  In this
+;;		    case the following applies:
+;;
+;;		    @(#x,gbr):	PC of the first instruction after the atomic
+;;				write-back instruction (exit point).
+;;
+;;		    r1:		Negative byte length of the atomic sequence.
+;;				The entry point PC of the sequence can be
+;;				determined by doing @(#x,gbr) + r1
+;;
+;; Note: #x is the user specified gbr-offset.
+;;
+;;
+;; Interrupt-Flipping Software Atomics (-matomic-model=soft-imask)
+;;
+;; This model achieves atomicity by temporarily disabling interrupts for
+;; the duration of the atomic sequence.  This works only when the program
+;; runs in privileged mode but does not require any support from the
+;; interrupt / exception handling code.  There is no particular ABI.
+;; To disable interrupts the SR.IMASK bits are set to '1111'.
+;; This method is not as efficient as the other software atomic models,
+;; since loading and storing SR (in order to flip interrupts on / off)
+;; requires using multi-cycle instructions.  Moreover, it can potentially
+;; increase the interrupt latency which might be important for hard-realtime
+;; applications.
+;;
+;;
+;; Compatibility Notes
+;;
+;; On single-core SH4A CPUs software atomic aware interrupt / exception code
+;; is actually compatible with user code that utilizes hardware atomics.
+;; Since SImode hardware atomic sequences are more compact on SH4A they are
+;; always used, regardless of the selected atomic model.  This atomic model
+;; mixing can be disabled by setting the 'strict' flag, like:
+;;	-matomic-model=soft-gusa,strict
+;;
+;; The software atomic models are generally compatible with each other,
+;; but the interrupt / exception handling code has to support both gUSA and
+;; TCB models.
+;;
+;; The current atomic support is limited to QImode, HImode and SImode 
+;; atomic operations.  DImode operations could also be implemented but
+;; would require some ABI modifications to support multiple-instruction
+;; write-back.  This is because SH1/SH2/SH3/SH4 does not have a DImode
+;; store instruction.  DImode stores must be split into two SImode stores.
+
+(define_c_enum "unspec" [
+  UNSPEC_ATOMIC
+])
+ 
+(define_c_enum "unspecv" [
+  UNSPECV_CMPXCHG_1
+  UNSPECV_CMPXCHG_2
+  UNSPECV_CMPXCHG_3
+])
+
+(define_mode_attr i124extend_insn [(QI "exts.b") (HI "exts.w") (SI "mov")])
+
+(define_code_iterator FETCHOP [plus minus ior xor and])
+(define_code_attr fetchop_name
+  [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
+
+(define_code_attr fetchop_predicate
+  [(plus "atomic_arith_operand") (minus "register_operand")
+   (ior "atomic_logical_operand") (xor "atomic_logical_operand")
+   (and "atomic_logical_operand")])
+
+(define_code_attr fetchop_constraint
+  [(plus "rI08") (minus "r") (ior "rK08") (xor "rK08") (and "rK08")])
+
+;;------------------------------------------------------------------------------
+;; comapre and swap
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:SI 0 "register_operand" "")		;; bool success output
+   (match_operand:QIHISI 1 "register_operand" "")	;; oldval output
+   (match_operand:QIHISI 2 "memory_operand" "")		;; memory
+   (match_operand:QIHISI 3 "atomic_arith_operand" "")	;; expected input
+   (match_operand:QIHISI 4 "atomic_arith_operand" "")	;; newval input
+   (match_operand:SI 5 "const_int_operand" "")		;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")		;; success model
+   (match_operand:SI 7 "const_int_operand" "")]		;; failure model
+  "TARGET_ATOMIC_ANY"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[2], 0));
+  rtx old_val = gen_lowpart (SImode, operands[1]);
+  rtx exp_val = operands[3];
+  rtx new_val = operands[4];
+  rtx atomic_insn;
+
+  if (TARGET_ATOMIC_HARD_LLCS
+      || (TARGET_SH4A_ARCH && <MODE>mode == SImode && !TARGET_ATOMIC_STRICT))
+    atomic_insn = gen_atomic_compare_and_swap<mode>_hard (old_val, addr,
+							  exp_val, new_val);
+  else if (TARGET_ATOMIC_SOFT_GUSA)
+    atomic_insn = gen_atomic_compare_and_swap<mode>_soft_gusa (old_val, addr,
+		      exp_val, new_val);
+  else if (TARGET_ATOMIC_SOFT_TCB)
+    atomic_insn = gen_atomic_compare_and_swap<mode>_soft_tcb (old_val, addr,
+		      exp_val, new_val, TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX);
+  else if (TARGET_ATOMIC_SOFT_IMASK)
+    atomic_insn = gen_atomic_compare_and_swap<mode>_soft_imask (old_val, addr,
+		      exp_val, new_val);
+  else
+    FAIL;
+
+  emit_insn (atomic_insn);
+
+  if (<MODE>mode == QImode)
+    emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[1]),
+				     operands[1]));
+  else if (<MODE>mode == HImode)
+    emit_insn (gen_zero_extendhisi2 (gen_lowpart (SImode, operands[1]),
+				     operands[1]));
+  emit_insn (gen_movsi (operands[0], gen_rtx_REG (SImode, T_REG)));
+  DONE;
+})
+
+(define_insn "atomic_compare_and_swapsi_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec_volatile:SI
+	  [(mem:SI (match_operand:SI 1 "register_operand" "r"))
+	   (match_operand:SI 2 "arith_operand" "rI08")
+	   (match_operand:SI 3 "arith_operand" "rI08")]
+	  UNSPECV_CMPXCHG_1))
+   (set (mem:SI (match_dup 1))
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (reg:SI T_REG)
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_3))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_ATOMIC_HARD_LLCS
+   || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
+{
+  return "\r0:	movli.l	@%1,r0"		"\n"
+	 "	cmp/eq	%2,r0"		"\n"
+	 "	bf{.|/}s	0f"	"\n"
+	 "	mov	r0,%0"		"\n"
+	 "	mov	%3,r0"		"\n"
+	 "	movco.l	r0,@%1"		"\n"
+	 "	bf	0b"		"\n"
+	 "0:";
+}
+  [(set_attr "length" "14")])
+
+(define_insn "atomic_compare_and_swap<mode>_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec_volatile:SI
+	  [(mem:QIHI (match_operand:SI 1 "register_operand" "r"))
+	   (match_operand:QIHI 2 "register_operand" "r")
+	   (match_operand:QIHI 3 "register_operand" "r")]
+	  UNSPECV_CMPXCHG_1))
+   (set (mem:QIHI (match_dup 1))
+	(unspec_volatile:QIHI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (reg:SI T_REG)
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_3))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=&r"))
+   (clobber (match_scratch:SI 6 "=1"))]
+  "TARGET_ATOMIC_HARD_LLCS"
+{
+  return "\r	mov	#-4,%5"			"\n"
+	 "	<i124extend_insn>	%2,%4"	"\n"
+	 "	and	%1,%5"			"\n"
+	 "	xor	%5,%1"			"\n"
+	 "	add	r15,%1"			"\n"
+	 "	add	#-4,%1"			"\n"
+	 "0:	movli.l	@%5,r0"			"\n"
+	 "	mov.l	r0,@-r15"		"\n"
+	 "	mov.<bw>	@%1,%0"		"\n"
+	 "	mov.<bw>	%3,@%1"		"\n"
+	 "	cmp/eq	%4,%0"			"\n"
+	 "	bf{.|/}s	0f"		"\n"
+	 "	mov.l	@r15+,r0"		"\n"
+	 "	movco.l	r0,@%5"			"\n"
+	 "	bf	0b"			"\n"
+	 "0:";
+}
+  [(set_attr "length" "30")])
+
+(define_insn "atomic_compare_and_swap<mode>_soft_gusa"
+  [(set (match_operand:SI 0 "register_operand" "=&u")
+	(unspec_volatile:SI
+	  [(mem:QIHISI (match_operand:SI 1 "register_operand" "u"))
+	   (match_operand:QIHISI 2 "register_operand" "u")
+	   (match_operand:QIHISI 3 "register_operand" "u")]
+	  UNSPECV_CMPXCHG_1))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec_volatile:QIHISI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (reg:SI T_REG)
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_3))
+   (clobber (match_scratch:SI 4 "=&u"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	<i124extend_insn>	%2,%4"	"\n"
+	 "	.align 2"			"\n"
+	 "	mov	r15,r1"			"\n"
+	 "	mov	#(0f-1f),r15"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	cmp/eq	%0,%4"			"\n"
+	 "	bf	1f"			"\n"
+	 "	mov.<bwl>	%3,@%1"		"\n"
+	 "1:	mov	r1,r15";
+}
+  [(set_attr "length" "20")])
+
+(define_insn "atomic_compare_and_swap<mode>_soft_tcb"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(unspec_volatile:SI
+	  [(mem:QIHISI (match_operand:SI 1 "register_operand" "r"))
+	   (match_operand:QIHISI 2 "register_operand" "r")
+	   (match_operand:QIHISI 3 "register_operand" "r")]
+	  UNSPECV_CMPXCHG_1))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec_volatile:QIHISI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (reg:SI T_REG)
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_3))
+   (use (match_operand:SI 4 "gbr_displacement"))
+   (clobber (match_scratch:SI 5 "=&r"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_TCB"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	.align 2"			"\n"
+	 "	<i124extend_insn>	%2,%5"	"\n"
+	 "	mov	#(0f-1f),r1"		"\n"
+	 "	mov.l	r0,@(%O4,gbr)"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	#0,r0"			"\n"
+	 "	cmp/eq	%0,%5"			"\n"
+	 "	bf	1f"			"\n"
+	 "	mov.<bwl>	%3,@%1"		"\n"
+	 "1:	mov.l	r0,@(%O4,gbr)";
+}
+  [(set_attr "length" "22")])
+
+(define_insn "atomic_compare_and_swap<mode>_soft_imask"
+  [(set (match_operand:SI 0 "register_operand" "=&z")
+	(unspec_volatile:SI
+	  [(mem:QIHISI (match_operand:SI 1 "register_operand" "r"))
+	   (match_operand:QIHISI 2 "register_operand" "r")
+	   (match_operand:QIHISI 3 "register_operand" "r")]
+	  UNSPECV_CMPXCHG_1))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec_volatile:QIHISI [(const_int 0)] UNSPECV_CMPXCHG_2))
+   (set (reg:SI T_REG)
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_3))
+   (clobber (match_scratch:SI 4 "=&r"))
+   (clobber (match_scratch:SI 5 "=&r"))]
+  "TARGET_ATOMIC_SOFT_IMASK"
+{
+  /* The comparison result is supposed to be in T_REG.
+     Notice that restoring SR will overwrite the T_REG.  We handle this by
+     rotating the T_REG into the saved SR before restoring SR.  On SH2A we
+     can do one insn shorter by using the bst insn.  */
+  if (!TARGET_SH2A)
+    return "\r	stc	sr,%0"			"\n"
+	   "	<i124extend_insn>	%2,%4"	"\n"
+	   "	mov	%0,%5"			"\n"
+	   "	or	#0xF0,%0"		"\n"
+	   "	shlr	%5"			"\n"
+	   "	ldc	%0,sr"			"\n"
+	   "	mov.<bwl>	@%1,%0"		"\n"
+	   "	cmp/eq	%4,%0"			"\n"
+	   "	bf	1f"			"\n"
+	   "	mov.<bwl>	%3,@%1"		"\n"
+	   "1:	rotcl	%5"			"\n"
+	   "	ldc	%5,sr";
+  else
+    return "\r	stc	sr,%0"			"\n"
+	   "	<i124extend_insn>	%2,%4"	"\n"
+	   "	mov	%0,%5"			"\n"
+	   "	or	#0xF0,%0"		"\n"
+	   "	ldc	%0,sr"			"\n"
+	   "	mov.<bwl>	@%1,%0"		"\n"
+	   "	cmp/eq	%4,%0"			"\n"
+	   "	bst	#0,%5"			"\n"
+	   "	bf	1f"			"\n"
+	   "	mov.<bwl>	%3,@%1"		"\n"
+	   "1:	ldc	%5,sr";
+}
+  [(set (attr "length") (if_then_else (match_test "!TARGET_SH2A")
+				      (const_string "24")
+				      (const_string "22")))])
+
+;;------------------------------------------------------------------------------
+;; read - write - return old value
+
+(define_expand "atomic_exchange<mode>"
+  [(match_operand:QIHISI 0 "register_operand" "")	;; oldval output
+   (match_operand:QIHISI 1 "memory_operand" "")		;; memory
+   (match_operand:QIHISI 2 "atomic_arith_operand" "")	;; newval input
+   (match_operand:SI 3 "const_int_operand" "")]		;; memory model
+  "TARGET_ATOMIC_ANY"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+  rtx val = operands[2];
+  rtx atomic_insn;
+
+  if (TARGET_ATOMIC_HARD_LLCS
+      || (TARGET_SH4A_ARCH && <MODE>mode == SImode && !TARGET_ATOMIC_STRICT))
+    atomic_insn = gen_atomic_exchange<mode>_hard (operands[0], addr, val);
+  else if (TARGET_ATOMIC_SOFT_GUSA)
+    atomic_insn = gen_atomic_exchange<mode>_soft_gusa (operands[0], addr, val);
+  else if (TARGET_ATOMIC_SOFT_TCB)
+    atomic_insn = gen_atomic_exchange<mode>_soft_tcb (operands[0], addr, val,
+		      TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX);
+  else if (TARGET_ATOMIC_SOFT_IMASK)
+    atomic_insn = gen_atomic_exchange<mode>_soft_imask (operands[0], addr, val);
+  else
+    FAIL;
+
+  emit_insn (atomic_insn);
+
+  if (<MODE>mode == QImode)
+    emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  else if (<MODE>mode == HImode)
+    emit_insn (gen_zero_extendhisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  DONE;
+})
+
+(define_insn "atomic_exchangesi_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mem:SI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(match_operand:SI 2 "arith_operand" "rI08")] UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_ATOMIC_HARD_LLCS
+   || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
+{
+  return "\r0:	movli.l	@%1,r0"		"\n"
+	 "	mov	r0,%0"		"\n"
+	 "	mov	%2,r0"		"\n"
+	 "	movco.l r0,@%1"		"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "10")])
+
+(define_insn "atomic_exchange<mode>_hard"
+  [(set (match_operand:QIHI 0 "register_operand" "=&r")
+	(mem:QIHI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHI (match_dup 1))
+	(unspec:QIHI
+	  [(match_operand:QIHI 2 "register_operand" "r")] UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=1"))]
+  "TARGET_ATOMIC_HARD_LLCS"
+{
+  return "\r	mov	#-4,%3"			"\n"
+	 "	and	%1,%3"			"\n"
+	 "	xor	%3,%1"			"\n"
+	 "	add	r15,%1"			"\n"
+	 "	add	#-4,%1"			"\n"
+	 "0:	movli.l	@%3,r0"			"\n"
+	 "	mov.l	r0,@-r15"		"\n"
+	 "	mov.<bw>	@%1,%0"		"\n"
+	 "	mov.<bw>	%2,@%1" 	"\n"
+	 "	mov.l	@r15+,r0"		"\n"
+	 "	movco.l	r0,@%3"			"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "24")])
+
+(define_insn "atomic_exchange<mode>_soft_gusa"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&u")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "u")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(match_operand:QIHISI 2 "register_operand" "u")] UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	r15,r1"			"\n"
+	 "	mov	#(0f-1f),r15"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov.<bwl>	%2,@%1"		"\n"
+	 "1:	mov	r1,r15";
+}
+  [(set_attr "length" "14")])
+
+(define_insn "atomic_exchange<mode>_soft_tcb"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&r")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(match_operand:QIHISI 2 "register_operand" "r")] UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))
+   (use (match_operand:SI 3 "gbr_displacement"))]
+  "TARGET_ATOMIC_SOFT_TCB"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	mov	#(0f-1f),r1"		"\n"
+	 "	.align 2"			"\n"
+	 "	mov.l	r0,@(%O3,gbr)"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	#0,r0"			"\n"
+	 "	mov.<bwl>	%2,@%1"		"\n"
+	 "1:	mov.l	r0,@(%O3,gbr)";
+}
+  [(set_attr "length" "16")])
+
+(define_insn "atomic_exchange<mode>_soft_imask"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&z")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(match_operand:QIHISI 2 "register_operand" "r")] UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_ATOMIC_SOFT_IMASK"
+{
+  return "\r	stc	sr,%0"			"\n"
+	 "	mov	%0,%3"			"\n"
+	 "	or	#0xF0,%0"		"\n"
+	 "	ldc	%0,sr"			"\n"
+	 "	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov.<bwl>	%2,@%1"		"\n"
+	 "	ldc	%3,sr";
+}
+  [(set_attr "length" "14")])
+
+;;------------------------------------------------------------------------------
+;; read - add|sub|or|and|xor|nand - write - return old value
+
+(define_expand "atomic_fetch_<fetchop_name><mode>"
+  [(set (match_operand:QIHISI 0 "register_operand" "")
+	(match_operand:QIHISI 1 "memory_operand" ""))
+   (set (match_dup 1)
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (match_dup 1)
+	     (match_operand:QIHISI 2 "<fetchop_predicate>" ""))]
+	  UNSPEC_ATOMIC))
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_ATOMIC_ANY"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+  rtx atomic_insn;
+
+  if (TARGET_ATOMIC_HARD_LLCS
+      || (TARGET_SH4A_ARCH && <MODE>mode == SImode && !TARGET_ATOMIC_STRICT))
+    atomic_insn = gen_atomic_fetch_<fetchop_name><mode>_hard (operands[0], addr,
+							      operands[2]);
+  else if (TARGET_ATOMIC_SOFT_GUSA)
+    atomic_insn = gen_atomic_fetch_<fetchop_name><mode>_soft_gusa (operands[0],
+		      addr, operands[2]);
+  else if (TARGET_ATOMIC_SOFT_TCB)
+    atomic_insn = gen_atomic_fetch_<fetchop_name><mode>_soft_tcb (operands[0],
+		      addr, operands[2], TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX);
+  else if (TARGET_ATOMIC_SOFT_IMASK)
+    atomic_insn = gen_atomic_fetch_<fetchop_name><mode>_soft_imask (operands[0],
+		      addr, operands[2]);
+  else
+    FAIL;
+
+  emit_insn (atomic_insn);
+
+  if (<MODE>mode == QImode)
+    emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  else if (<MODE>mode == HImode)
+    emit_insn (gen_zero_extendhisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  DONE;
+})
+
+(define_insn "atomic_fetch_<fetchop_name>si_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mem:SI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(FETCHOP:SI (mem:SI (match_dup 1))
+	     (match_operand:SI 2 "<fetchop_predicate>" "<fetchop_constraint>"))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_ATOMIC_HARD_LLCS
+   || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
+{
+  return "\r0:	movli.l	@%1,r0"		"\n"
+	 "	mov	r0,%0"		"\n"
+	 "	<fetchop_name>	%2,r0"	"\n"
+	 "	movco.l	r0,@%1"		"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "10")])
+
+(define_insn "atomic_fetch_<fetchop_name><mode>_hard"
+  [(set (match_operand:QIHI 0 "register_operand" "=&r")
+	(mem:QIHI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHI (match_dup 1))
+	(unspec:QIHI
+	  [(FETCHOP:QIHI (mem:QIHI (match_dup 1))
+	     (match_operand:QIHI 2 "<fetchop_predicate>" "<fetchop_constraint>"))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=1"))]
+  "TARGET_ATOMIC_HARD_LLCS"
+{
+  return "\r	mov	#-4,%3"			"\n"
+	 "	and	%1,%3"			"\n"
+	 "	xor	%3,%1"			"\n"
+	 "	add	r15,%1"			"\n"
+	 "	add	#-4,%1"			"\n"
+	 "0:	movli.l	@%3,r0"			"\n"
+	 "	mov.l	r0,@-r15"		"\n"
+	 "	mov.<bw>	@%1,r0"		"\n"
+	 "	mov	r0,%0"			"\n"
+	 "	<fetchop_name>	%2,r0"		"\n"
+	 "	mov.<bw>	r0,@%1"		"\n"
+	 "	mov.l	@r15+,r0"		"\n"
+	 "	movco.l	r0,@%3"			"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "28")])
+
+(define_insn "atomic_fetch_<fetchop_name><mode>_soft_gusa"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&u")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "u")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (mem:QIHISI (match_dup 1))
+	     (match_operand:QIHISI 2 "register_operand" "u"))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:QIHISI 3 "=&u"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	r15,r1"			"\n"
+	 "	mov	#(0f-1f),r15"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	%0,%3"			"\n"
+	 "	<fetchop_name>	%2,%3"		"\n"
+	 "	mov.<bwl>	%3,@%1"		"\n"
+	 "1:	mov	r1,r15";
+}
+  [(set_attr "length" "18")])
+
+(define_insn "atomic_fetch_<fetchop_name><mode>_soft_tcb"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&r")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (mem:QIHISI (match_dup 1))
+	     (match_operand:QIHISI 2 "register_operand" "r"))]
+	  UNSPEC_ATOMIC))
+   (use (match_operand:SI 3 "gbr_displacement"))
+   (clobber (match_scratch:QIHISI 4 "=&r"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_TCB"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	mov	#(0f-1f),r1"		"\n"
+	 "	.align 2"			"\n"
+	 "	mov.l	r0,@(%O3,gbr)"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	#0,r0"			"\n"
+	 "	mov	%0,%4"			"\n"
+	 "	<fetchop_name>	%2,%4"		"\n"
+	 "	mov.<bwl>	%4,@%1"		"\n"
+	 "1:	mov.l	r0,@(%O3,gbr)";
+}
+  [(set_attr "length" "20")])
+
+(define_insn "atomic_fetch_<fetchop_name><mode>_soft_imask"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&z")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (mem:QIHISI (match_dup 1))
+	     (match_operand:QIHISI 2 "register_operand" "r"))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:QIHISI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  "TARGET_ATOMIC_SOFT_IMASK"
+{
+  return "\r	stc	sr,%0"			"\n"
+	 "	mov	%0,%4"			"\n"
+	 "	or	#0xF0,%0"		"\n"
+	 "	ldc	%0,sr"			"\n"
+	 "	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	%0,%3"			"\n"
+	 "	<fetchop_name>	%2,%3"		"\n"
+	 "	mov.<bwl>	%3,@%1"		"\n"
+	 "	ldc	%4,sr";
+}
+  [(set_attr "length" "18")])
+
+(define_expand "atomic_fetch_nand<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand" "")
+	(match_operand:QIHISI 1 "memory_operand" ""))
+   (set (match_dup 1)
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (match_dup 1)
+		       (match_operand:QIHISI 2 "atomic_logical_operand" "")))]
+	  UNSPEC_ATOMIC))
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_ATOMIC_ANY"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+  rtx atomic_insn;
+
+  if (TARGET_ATOMIC_HARD_LLCS
+      || (TARGET_SH4A_ARCH && <MODE>mode == SImode && !TARGET_ATOMIC_STRICT))
+    atomic_insn = gen_atomic_fetch_nand<mode>_hard (operands[0], addr,
+						    operands[2]);
+  else if (TARGET_ATOMIC_SOFT_GUSA)
+    atomic_insn = gen_atomic_fetch_nand<mode>_soft_gusa (operands[0], addr,
+							 operands[2]);
+  else if (TARGET_ATOMIC_SOFT_TCB)
+    atomic_insn = gen_atomic_fetch_nand<mode>_soft_tcb (operands[0], addr,
+		      operands[2], TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX);
+  else if (TARGET_ATOMIC_SOFT_IMASK)
+    atomic_insn = gen_atomic_fetch_nand<mode>_soft_imask (operands[0], addr,
+							  operands[2]);
+  else
+    FAIL;
+
+  emit_insn (atomic_insn);
+
+  if (<MODE>mode == QImode)
+    emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  else if (<MODE>mode == HImode)
+    emit_insn (gen_zero_extendhisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  DONE;
+})
+
+(define_insn "atomic_fetch_nandsi_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&r")
+	(mem:SI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(not:SI (and:SI (mem:SI (match_dup 1))
+		   (match_operand:SI 2 "logical_operand" "rK08")))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_ATOMIC_HARD_LLCS
+   || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
+{
+  return "\r0:	movli.l	@%1,r0"		"\n"
+	 "	mov	r0,%0"		"\n"
+	 "	and	%2,r0"		"\n"
+	 "	not	r0,r0"		"\n"
+	 "	movco.l	r0,@%1"		"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "12")])
+
+(define_insn "atomic_fetch_nand<mode>_hard"
+  [(set (match_operand:QIHI 0 "register_operand" "=&r")
+	(mem:QIHI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHI (match_dup 1))
+	(unspec:QIHI
+	  [(not:QIHI (and:QIHI (mem:QIHI (match_dup 1))
+		     (match_operand:QIHI 2 "logical_operand" "rK08")))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=1"))]
+  "TARGET_ATOMIC_HARD_LLCS"
+{
+  return "\r	mov	#-4,%3"			"\n"
+	 "	and	%1,%3"			"\n"
+	 "	xor	%3,%1"			"\n"
+	 "	add	r15,%1"			"\n"
+	 "	add	#-4,%1"			"\n"
+	 "0:	movli.l	@%3,r0"			"\n"
+	 "	mov.l	r0,@-r15"		"\n"
+	 "	mov.<bw>	@%1,r0"		"\n"
+	 "	mov	r0,%0"			"\n"
+	 "	and	%2,r0"			"\n"
+	 "	not	r0,r0"			"\n"
+	 "	mov.<bw>	r0,@%1"		"\n"
+	 "	mov.l	@r15+,r0"		"\n"
+	 "	movco.l	r0,@%3"			"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "30")])
+
+(define_insn "atomic_fetch_nand<mode>_soft_gusa"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&u")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "u")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (mem:QIHISI (match_dup 1))
+	     (match_operand:QIHISI 2 "register_operand" "u")))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:QIHISI 3 "=&u"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	mov	r15,r1"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	#(0f-1f),r15"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	%2,%3"			"\n"
+	 "	and	%0,%3"			"\n"
+	 "	not	%3,%3"			"\n"
+	 "	mov.<bwl>	%3,@%1"		"\n"
+	 "1:	mov	r1,r15";
+}
+  [(set_attr "length" "20")])
+
+(define_insn "atomic_fetch_nand<mode>_soft_tcb"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&r")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (mem:QIHISI (match_dup 1))
+	     (match_operand:QIHISI 2 "register_operand" "r")))]
+	  UNSPEC_ATOMIC))
+   (use (match_operand:SI 3 "gbr_displacement"))
+   (clobber (match_scratch:QIHISI 4 "=&r"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_TCB"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	#(0f-1f),r1"		"\n"
+	 "	mov.l	r0,@(%O3,gbr)"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	#0,r0"			"\n"
+	 "	mov	%2,%4"			"\n"
+	 "	and	%0,%4"			"\n"
+	 "	not	%4,%4"			"\n"
+	 "	mov.<bwl>	%4,@%1"		"\n"
+	 "1:	mov.l	r0,@(%O3,gbr)";
+}
+  [(set_attr "length" "22")])
+
+(define_insn "atomic_fetch_nand<mode>_soft_imask"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&z")
+	(mem:QIHISI (match_operand:SI 1 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (mem:QIHISI (match_dup 1))
+	     (match_operand:QIHISI 2 "register_operand" "r")))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:QIHISI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=&r"))]
+  "TARGET_ATOMIC_SOFT_IMASK"
+{
+  return "\r	stc	sr,%0"			"\n"
+	 "	mov	%0,%4"			"\n"
+	 "	or	#0xF0,%0"		"\n"
+	 "	ldc	%0,sr"			"\n"
+	 "	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	%2,%3"			"\n"
+	 "	and	%0,%3"			"\n"
+	 "	not	%3,%3"			"\n"
+	 "	mov.<bwl>	%3,@%1"		"\n"
+	 "	stc	%4,sr";
+}
+  [(set_attr "length" "20")])
+
+;;------------------------------------------------------------------------------
+;; read - add|sub|or|and|xor|nand - write - return new value
+
+(define_expand "atomic_<fetchop_name>_fetch<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand" "")
+	(FETCHOP:QIHISI
+	  (match_operand:QIHISI 1 "memory_operand" "")
+	  (match_operand:QIHISI 2 "<fetchop_predicate>" "")))
+   (set (match_dup 1)
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (match_dup 1) (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_ATOMIC_ANY"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+  rtx atomic_insn;
+
+  if (TARGET_ATOMIC_HARD_LLCS
+      || (TARGET_SH4A_ARCH && <MODE>mode == SImode && !TARGET_ATOMIC_STRICT))
+    atomic_insn = gen_atomic_<fetchop_name>_fetch<mode>_hard (operands[0], addr,
+							      operands[2]);
+  else if (TARGET_ATOMIC_SOFT_GUSA)
+    atomic_insn = gen_atomic_<fetchop_name>_fetch<mode>_soft_gusa (operands[0],
+		      addr, operands[2]);
+  else if (TARGET_ATOMIC_SOFT_TCB)
+    atomic_insn = gen_atomic_<fetchop_name>_fetch<mode>_soft_tcb (operands[0],
+		      addr, operands[2], TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX);
+  else if (TARGET_ATOMIC_SOFT_IMASK)
+    atomic_insn = gen_atomic_<fetchop_name>_fetch<mode>_soft_imask (operands[0],
+		      addr, operands[2]);
+  else
+    FAIL;
+
+  emit_insn (atomic_insn);
+
+  if (<MODE>mode == QImode)
+    emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  else if (<MODE>mode == HImode)
+    emit_insn (gen_zero_extendhisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  DONE;
+})
+
+(define_insn "atomic_<fetchop_name>_fetchsi_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&z")
+	(FETCHOP:SI
+	  (mem:SI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:SI 2 "<fetchop_predicate>" "<fetchop_constraint>")))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(FETCHOP:SI (mem:SI (match_dup 1)) (match_dup 2))]
+	  UNSPEC_ATOMIC))]
+  "TARGET_ATOMIC_HARD_LLCS
+   || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
+{
+  return "\r0:	movli.l	@%1,%0"		"\n"
+	 "	<fetchop_name>	%2,%0"	"\n"
+	 "	movco.l	%0,@%1"		"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "8")])
+
+(define_insn "atomic_<fetchop_name>_fetch<mode>_hard"
+  [(set (match_operand:QIHI 0 "register_operand" "=&r")
+	(FETCHOP:QIHI
+	  (mem:QIHI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:QIHI 2 "<fetchop_predicate>" "<fetchop_constraint>")))
+   (set (mem:QIHI (match_dup 1))
+	(unspec:QIHI
+	  [(FETCHOP:QIHI (mem:QIHI (match_dup 1)) (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=1"))]
+  "TARGET_ATOMIC_HARD_LLCS"
+{
+  return "\r	mov	#-4,%3"			"\n"
+	 "	and	%1,%3"			"\n"
+	 "	xor	%3,%1"			"\n"
+	 "	add	r15,%1"			"\n"
+	 "	add	#-4,%1"			"\n"
+	 "0:	movli.l	@%3,r0"			"\n"
+	 "	mov.l	r0,@-r15"		"\n"
+	 "	mov.<bw>	@%1,r0"		"\n"
+	 "	<fetchop_name>	%2,r0"		"\n"
+	 "	mov.<bw>	r0,@%1"		"\n"
+	 "	mov	r0,%0"			"\n"
+	 "	mov.l	@r15+,r0"		"\n"
+	 "	movco.l	r0,@%3"			"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "28")])
+
+(define_insn "atomic_<fetchop_name>_fetch<mode>_soft_gusa"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&u")
+	(FETCHOP:QIHISI
+	  (mem:QIHISI (match_operand:SI 1 "register_operand" "u"))
+	  (match_operand:QIHISI 2 "register_operand" "u")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (mem:QIHISI (match_dup 1)) (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	mov	r15,r1"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	#(0f-1f),r15"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	<fetchop_name>	%2,%0"		"\n"
+	 "	mov.<bwl>	%0,@%1"		"\n"
+	 "1:	mov	r1,r15";
+}
+  [(set_attr "length" "16")])
+
+(define_insn "atomic_<fetchop_name>_fetch<mode>_soft_tcb"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&r")
+	(FETCHOP:QIHISI
+	  (mem:QIHISI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:QIHISI 2 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (mem:QIHISI (match_dup 1)) (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))
+   (use (match_operand:SI 3 "gbr_displacement"))]
+  "TARGET_ATOMIC_SOFT_TCB"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	#(0f-1f),r1"		"\n"
+	 "	mov.l	r0,@(%O3,gbr)"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	#0,r0"			"\n"
+	 "	<fetchop_name>	%2,%0"		"\n"
+	 "	mov.<bwl>	%0,@%1"		"\n"
+	 "1:	mov.l	r0,@(%O3,gbr)";
+}
+  [(set_attr "length" "18")])
+
+(define_insn "atomic_<fetchop_name>_fetch<mode>_soft_imask"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&z")
+	(FETCHOP:QIHISI
+	  (mem:QIHISI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:QIHISI 2 "register_operand" "r")))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(FETCHOP:QIHISI (mem:QIHISI (match_dup 1)) (match_dup 2))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_ATOMIC_SOFT_IMASK"
+{
+  return "\r	stc	sr,%0"			"\n"
+	 "	mov	%0,%3"			"\n"
+	 "	or	#0xF0,%0"		"\n"
+	 "	ldc	%0,sr"			"\n"
+	 "	mov.<bwl>	@%1,%0"		"\n"
+	 "	<fetchop_name>	%2,%0"		"\n"
+	 "	mov.<bwl>	%0,@%1"		"\n"
+	 "	ldc	%3,sr";
+}
+  [(set_attr "length" "16")])
+
+(define_expand "atomic_nand_fetch<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand" "")
+	(not:QIHISI (and:QIHISI
+	  (match_operand:QIHISI 1 "memory_operand" "")
+	  (match_operand:QIHISI 2 "atomic_logical_operand" ""))))
+   (set (match_dup 1)
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (match_dup 1) (match_dup 2)))]
+	  UNSPEC_ATOMIC))
+   (match_operand:SI 3 "const_int_operand" "")]
+  "TARGET_ATOMIC_ANY"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+  rtx atomic_insn;
+
+  if (TARGET_ATOMIC_HARD_LLCS
+      || (TARGET_SH4A_ARCH && <MODE>mode == SImode && !TARGET_ATOMIC_STRICT))
+    atomic_insn = gen_atomic_nand_fetch<mode>_hard (operands[0], addr,
+						    operands[2]);
+  else if (TARGET_ATOMIC_SOFT_GUSA)
+    atomic_insn = gen_atomic_nand_fetch<mode>_soft_gusa (operands[0], addr,
+							 operands[2]);
+  else if (TARGET_ATOMIC_SOFT_TCB)
+    atomic_insn = gen_atomic_nand_fetch<mode>_soft_tcb (operands[0], addr,
+		      operands[2], TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX);
+  else if (TARGET_ATOMIC_SOFT_IMASK)
+    atomic_insn = gen_atomic_nand_fetch<mode>_soft_imask (operands[0], addr,
+							  operands[2]);
+  else
+    FAIL;
+
+  emit_insn (atomic_insn);
+
+  if (<MODE>mode == QImode)
+    emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  else if (<MODE>mode == HImode)
+    emit_insn (gen_zero_extendhisi2 (gen_lowpart (SImode, operands[0]),
+				     operands[0]));
+  DONE;
+})
+
+(define_insn "atomic_nand_fetchsi_hard"
+  [(set (match_operand:SI 0 "register_operand" "=&z")
+	(not:SI (and:SI (mem:SI (match_operand:SI 1 "register_operand" "r"))
+			(match_operand:SI 2 "logical_operand" "rK08"))))
+   (set (mem:SI (match_dup 1))
+	(unspec:SI
+	  [(not:SI (and:SI (mem:SI (match_dup 1)) (match_dup 2)))]
+	  UNSPEC_ATOMIC))]
+  "TARGET_ATOMIC_HARD_LLCS
+   || (TARGET_SH4A_ARCH && TARGET_ATOMIC_ANY && !TARGET_ATOMIC_STRICT)"
+{
+  return "\r0:	movli.l	@%1,%0"		"\n"
+	 "	and	%2,%0"		"\n"
+	 "	not	%0,%0"		"\n"
+	 "	movco.l	%0,@%1"		"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "10")])
+
+(define_insn "atomic_nand_fetch<mode>_hard"
+  [(set (match_operand:QIHI 0 "register_operand" "=&r")
+	(not:QIHI
+	  (and:QIHI (mem:QIHI (match_operand:SI 1 "register_operand" "r"))
+		    (match_operand:QIHI 2 "logical_operand" "rK08"))))
+   (set (mem:QIHI (match_dup 1))
+	(unspec:QIHI
+	  [(not:QIHI (and:QIHI (mem:QIHI (match_dup 1)) (match_dup 2)))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=1"))]
+  "TARGET_ATOMIC_HARD_LLCS"
+{
+  return "\r	mov	#-4,%3"			"\n"
+	 "	and	%1,%3"			"\n"
+	 "	xor	%3,%1"			"\n"
+	 "	add	r15,%1"			"\n"
+	 "	add	#-4,%1"			"\n"
+	 "0:	movli.l	@%3,r0"			"\n"
+	 "	mov.l	r0,@-r15"		"\n"
+	 "	mov.<bw>	@%1,r0"		"\n"
+	 "	and	%2,r0"			"\n"
+	 "	not	r0,%0"			"\n"
+	 "	mov.<bw>	%0,@%1"		"\n"
+	 "	mov.l	@r15+,r0"		"\n"
+	 "	movco.l	r0,@%3"			"\n"
+	 "	bf	0b";
+}
+  [(set_attr "length" "28")])
+
+(define_insn "atomic_nand_fetch<mode>_soft_gusa"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&u")
+	(not:QIHISI (and:QIHISI
+	  (mem:QIHISI (match_operand:SI 1 "register_operand" "u"))
+	  (match_operand:QIHISI 2 "register_operand" "u"))))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (mem:QIHISI (match_dup 1)) (match_dup 2)))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	.align 2"			"\n"
+	 "	mov	r15,r1"			"\n"
+	 "	mov	#(0f-1f),r15"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	and	%2,%0"			"\n"
+	 "	not	%0,%0"			"\n"
+	 "	mov.<bwl>	%0,@%1"		"\n"
+	 "1:	mov	r1,r15";
+}
+  [(set_attr "length" "18")])
+
+(define_insn "atomic_nand_fetch<mode>_soft_tcb"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&r")
+	(not:QIHISI (and:QIHISI
+	  (mem:QIHISI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:QIHISI 2 "register_operand" "r"))))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (mem:QIHISI (match_dup 1)) (match_dup 2)))]
+	  UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))
+   (use (match_operand:SI 3 "gbr_displacement"))]
+  "TARGET_ATOMIC_SOFT_TCB"
+{
+  return "\r	mova	1f,r0"			"\n"
+	 "	mov	#(0f-1f),r1"		"\n"
+	 "	.align 2"			"\n"
+	 "	mov.l	r0,@(%O3,gbr)"		"\n"
+	 "0:	mov.<bwl>	@%1,%0"		"\n"
+	 "	mov	#0,r0"			"\n"
+	 "	and	%2,%0"			"\n"
+	 "	not	%0,%0"			"\n"
+	 "	mov.<bwl>	%0,@%1"		"\n"
+	 "1:	mov.l	r0,@(%O3,gbr)";
+}
+  [(set_attr "length" "20")])
+
+(define_insn "atomic_nand_fetch<mode>_soft_imask"
+  [(set (match_operand:QIHISI 0 "register_operand" "=&z")
+	(not:QIHISI (and:QIHISI
+	  (mem:QIHISI (match_operand:SI 1 "register_operand" "r"))
+	  (match_operand:QIHISI 2 "register_operand" "r"))))
+   (set (mem:QIHISI (match_dup 1))
+	(unspec:QIHISI
+	  [(not:QIHISI (and:QIHISI (mem:QIHISI (match_dup 1)) (match_dup 2)))]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  "TARGET_ATOMIC_SOFT_IMASK"
+{
+  return "\r	stc	sr,%0"			"\n"
+	 "	mov	%0,%3"			"\n"
+	 "	or	#0xF0,%0"		"\n"
+	 "	ldc	%0,sr"			"\n"
+	 "	mov.<bwl>	@%1,%0"		"\n"
+	 "	and	%2,%0"			"\n"
+	 "	not	%0,%0"			"\n"
+	 "	mov.<bwl>	%0,@%1"		"\n"
+	 "	ldc	%3,sr";
+}
+  [(set_attr "length" "18")])
+
+;;------------------------------------------------------------------------------
+;; read - test against zero - or with 0x80 - write - return test result
+
+(define_expand "atomic_test_and_set"
+  [(match_operand:SI 0 "register_operand" "")		;; bool result output
+   (match_operand:QI 1 "memory_operand" "")		;; memory
+   (match_operand:SI 2 "const_int_operand" "")]		;; model
+  "(TARGET_ATOMIC_ANY || TARGET_ENABLE_TAS) && !TARGET_SHMEDIA"
+{
+  rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+
+  if (TARGET_ENABLE_TAS)
+    emit_insn (gen_tasb (addr));
+  else
+    {
+      rtx val = gen_int_mode (targetm.atomic_test_and_set_trueval, QImode);
+      val = force_reg (QImode, val);
+
+      if (TARGET_ATOMIC_HARD_LLCS)
+	  emit_insn (gen_atomic_test_and_set_hard (addr, val));
+      else if (TARGET_ATOMIC_SOFT_GUSA)
+	  emit_insn (gen_atomic_test_and_set_soft_gusa (addr, val));
+      else if (TARGET_ATOMIC_SOFT_TCB)
+	  emit_insn (gen_atomic_test_and_set_soft_tcb (addr, val,
+			 TARGET_ATOMIC_SOFT_TCB_GBR_OFFSET_RTX));
+      else if (TARGET_ATOMIC_SOFT_IMASK)
+	  emit_insn (gen_atomic_test_and_set_soft_imask (addr, val));
+      else
+	FAIL;
+    }
+
+  /* The result of the test op is the inverse of what we are
+     supposed to return.  Thus invert the T bit.  The inversion will be
+     potentially optimized away and integrated into surrounding code.  */
+  emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
+  DONE;
+})
+
+(define_insn "tasb"
+  [(set (reg:SI T_REG)
+	(eq:SI (mem:QI (match_operand:SI 0 "register_operand" "r"))
+	       (const_int 0)))
+   (set (mem:QI (match_dup 0))
+	(unspec:QI [(const_int 128)] UNSPEC_ATOMIC))]
+  "TARGET_ENABLE_TAS && !TARGET_SHMEDIA"
+  "tas.b	@%0"
+  [(set_attr "insn_class" "co_group")])
+
+(define_insn "atomic_test_and_set_soft_gusa"
+  [(set (reg:SI T_REG)
+	(eq:SI (mem:QI (match_operand:SI 0 "register_operand" "u"))
+	       (const_int 0)))
+   (set (mem:QI (match_dup 0))
+	(unspec:QI [(match_operand:QI 1 "register_operand" "u")] UNSPEC_ATOMIC))
+   (clobber (match_scratch:QI 2 "=&u"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_GUSA && !TARGET_ENABLE_TAS"
+{
+  return "\r	mova	1f,r0"		"\n"
+	 "	.align 2"		"\n"
+	 "	mov	r15,r1"		"\n"
+	 "	mov	#(0f-1f),r15"	"\n"
+	 "0:	mov.b	@%0,%2"		"\n"
+	 "	mov.b	%1,@%0"		"\n"
+	 "1:	mov	r1,r15"		"\n"
+	 "	tst	%2,%2";
+}
+  [(set_attr "length" "16")])
+
+(define_insn "atomic_test_and_set_soft_tcb"
+  [(set (reg:SI T_REG)
+	(eq:SI (mem:QI (match_operand:SI 0 "register_operand" "r"))
+	       (const_int 0)))
+   (set (mem:QI (match_dup 0))
+	(unspec:QI [(match_operand:QI 1 "register_operand" "r")] UNSPEC_ATOMIC))
+   (use (match_operand:SI 2 "gbr_displacement"))
+   (clobber (match_scratch:QI 3 "=&r"))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))]
+  "TARGET_ATOMIC_SOFT_TCB && !TARGET_ENABLE_TAS"
+{
+  return "\r	mova	1f,r0"		"\n"
+	 "	mov	#(0f-1f),r1"	"\n"
+	 "	.align 2"		"\n"
+	 "	mov.l	r0,@(%O2,gbr)"	"\n"
+	 "0:	mov.b	@%0,%3"		"\n"
+	 "	mov	#0,r0"		"\n"
+	 "	mov.b	%1,@%0"		"\n"
+	 "1:	mov.l	r0,@(%O2,gbr)"	"\n"
+	 "	tst	%3,%3";
+}
+  [(set_attr "length" "18")])
+
+(define_insn "atomic_test_and_set_soft_imask"
+  [(set (reg:SI T_REG)
+	(eq:SI (mem:QI (match_operand:SI 0 "register_operand" "r"))
+	       (const_int 0)))
+   (set (mem:QI (match_dup 0))
+	(unspec:QI [(match_operand:QI 1 "register_operand" "r")] UNSPEC_ATOMIC))
+   (clobber (match_scratch:SI 2 "=&r"))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_ATOMIC_SOFT_IMASK && !TARGET_ENABLE_TAS"
+{
+  return "\r	stc	sr,r0"		"\n"
+	 "	mov	r0,%2"		"\n"
+	 "	or	#0xF0,r0"	"\n"
+	 "	ldc	r0,sr"		"\n"
+	 "	mov.b	@%0,r0"		"\n"
+	 "	mov.b	%1,@%0"		"\n"
+	 "	stc	%2,sr"		"\n"
+	 "	tst	r0,r0";
+}
+  [(set_attr "length" "16")])
+
+(define_insn "atomic_test_and_set_hard"
+  [(set (reg:SI T_REG)
+	(eq:SI (mem:QI (match_operand:SI 0 "register_operand" "r"))
+	       (const_int 0)))
+   (set (mem:QI (match_dup 0))
+	(unspec:QI [(match_operand:QI 1 "register_operand" "r")] UNSPEC_ATOMIC))
+   (clobber (reg:SI R0_REG))
+   (clobber (match_scratch:SI 2 "=&r"))
+   (clobber (match_scratch:SI 3 "=&r"))
+   (clobber (match_scratch:SI 4 "=0"))]
+  "TARGET_ATOMIC_HARD_LLCS && !TARGET_ENABLE_TAS"
+{
+  return "\r	mov	#-4,%2"		"\n"
+	 "	and	%0,%2"		"\n"
+	 "	xor	%2,%0"		"\n"
+	 "	add	r15,%0"		"\n"
+	 "	add	#-4,%0"		"\n"
+	 "0:	movli.l	@%2,r0"		"\n"
+	 "	mov.l	r0,@-r15"	"\n"
+	 "	mov.b	@%0,%3"		"\n"
+	 "	mov.b	%1,@%0"		"\n"
+	 "	mov.l	@r15+,r0"	"\n"
+	 "	movco.l	r0,@%2"		"\n"
+	 "	bf	0b"		"\n"
+	 "	tst	%3,%3";
+}
+  [(set_attr "length" "26")])
+
diff --git a/gcc-4.9/gcc/config/sh/t-linux b/gcc-4.9/gcc/config/sh/t-linux
new file mode 100644
index 000000000..d33c63839
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/t-linux
@@ -0,0 +1,2 @@
+MULTILIB_DIRNAMES= 
+MULTILIB_MATCHES = 
diff --git a/gcc-4.9/gcc/config/sh/t-netbsd-sh5-64 b/gcc-4.9/gcc/config/sh/t-netbsd-sh5-64
new file mode 100644
index 000000000..8fc6bd1ea
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/t-netbsd-sh5-64
@@ -0,0 +1 @@
+MULTILIB_DIRNAMES= $(MULTILIB_RAW_DIRNAMES:/media64=)
diff --git a/gcc-4.9/gcc/config/sh/t-rtems b/gcc-4.9/gcc/config/sh/t-rtems
new file mode 100644
index 000000000..9fd262cf8
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/t-rtems
@@ -0,0 +1,7 @@
+# Custom multilibs for RTEMS
+
+MULTILIB_ENDIAN = ml
+MULTILIB_OPTIONS= $(MULTILIB_ENDIAN) m2/m2e/m4-single-only/m4-single/m4
+MULTILIB_DIRNAMES= 
+MULTILIB_MATCHES = m2=m3 m2e=m3e m2=m4-nofpu
+MULTILIB_EXCEPTIONS = ml
diff --git a/gcc-4.9/gcc/config/sh/t-sh b/gcc-4.9/gcc/config/sh/t-sh
new file mode 100644
index 000000000..d9f2b3d93
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/t-sh
@@ -0,0 +1,101 @@
+# Copyright (C) 1993-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+sh-mem.o: $(srcdir)/config/sh/sh-mem.cc \
+  $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+sh-c.o: $(srcdir)/config/sh/sh-c.c \
+  $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/sh/sh-c.c
+
+sh_treg_combine.o: $(srcdir)/config/sh/sh_treg_combine.cc \
+  $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+sh_optimize_sett_clrt.o: $(srcdir)/config/sh/sh_optimize_sett_clrt.cc \
+  $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
+DEFAULT_ENDIAN = $(word 1,$(TM_ENDIAN_CONFIG))
+OTHER_ENDIAN = $(word 2,$(TM_ENDIAN_CONFIG))
+
+MULTILIB_OPTIONS= $(OTHER_ENDIAN) $(TM_MULTILIB_CONFIG)
+MULTILIB_DIRNAMES= 
+
+# The separate entries for m2a-nofpu and m2a-single-only with
+# duplicate base libraries are there to make sure we don't ever use an
+# m4* multilib for m2a or vice-versa; they are not compatible.  This
+# is why sh2a and sh2a-single need their own multilibs.
+MULTILIB_MATCHES = $(shell \
+  multilibs="$(MULTILIB_OPTIONS)" ; \
+  for abi in m1,m2,m3,m4-nofpu,m4-100-nofpu,m4-200-nofpu,m4-400,m4-500,m4-340,m4-300-nofpu,m4al,m4a-nofpu \
+             m1,m2,m2a-nofpu \
+             m2e,m3e,m4-single-only,m4-100-single-only,m4-200-single-only,m4-300-single-only,m4a-single-only \
+             m2a-single,m2a-single-only \
+             m4-single,m4-100-single,m4-200-single,m4-300-single,m4a-single \
+             m4,m4-100,m4-200,m4-300,m4a \
+             m5-32media,m5-compact,m5-32media \
+             m5-32media-nofpu,m5-compact-nofpu,m5-32media-nofpu; do \
+    subst= ; \
+    for lib in `echo $$abi|tr , ' '` ; do \
+      if test "`echo $$multilibs|sed s/$$lib//`" != "$$multilibs"; then \
+        subst=$$lib ; \
+      elif test x$$subst != x ; then \
+        echo $$subst=$$lib ; \
+      fi \
+    done \
+  done)
+
+# SH1 only supports big endian.
+MULTILIB_EXCEPTIONS = ml/m1 ml/m2a* $(TM_MULTILIB_EXCEPTIONS_CONFIG)
+
+MULTILIB_OSDIRNAMES = \
+	$(OTHER_ENDIAN)=!$(OTHER_ENDIAN) \
+	m1=!m1 $(OTHER_ENDIAN)/m1=!$(OTHER_ENDIAN)/m1 \
+	m2a=!m2a $(OTHER_ENDIAN)/m2a=!$(OTHER_ENDIAN)/m2a \
+	m2a-nofpu=!m2a-nofpu $(OTHER_ENDIAN)/m2a-nofpu=!$(OTHER_ENDIAN)/m2a-nofpu \
+	m2a-single-only=!m2a-single-only $(OTHER_ENDIAN)/m2a-single-only=!$(OTHER_ENDIAN)/m2a-single-only \
+	m2a-single=!m2a-single $(OTHER_ENDIAN)/m2a-single=!$(OTHER_ENDIAN)/m2a-single \
+	m2e=!m2e $(OTHER_ENDIAN)/m2e=!$(OTHER_ENDIAN)/m2e \
+	m2=!m2 $(OTHER_ENDIAN)/m2=!$(OTHER_ENDIAN)/m2 \
+	m3e=!m3e $(OTHER_ENDIAN)/m3e=!$(OTHER_ENDIAN)/m3e \
+	m3=!m3 $(OTHER_ENDIAN)/m3=!$(OTHER_ENDIAN)/m3 \
+	m4-nofpu=!m4-nofpu $(OTHER_ENDIAN)/m4-nofpu=!$(OTHER_ENDIAN)/m4-nofpu \
+	m4-single-only=!m4-single-only $(OTHER_ENDIAN)/m4-single-only=!$(OTHER_ENDIAN)/m4-single-only \
+	m4-single=!m4-single $(OTHER_ENDIAN)/m4-single=!$(OTHER_ENDIAN)/m4-single \
+	m4=!m4 $(OTHER_ENDIAN)/m4=!$(OTHER_ENDIAN)/m4 \
+	m4a-nofpu=!m4a-nofpu $(OTHER_ENDIAN)/m4a-nofpu=!$(OTHER_ENDIAN)/m4a-nofpu \
+	m4a-single-only=!m4a-single-only $(OTHER_ENDIAN)/m4a-single-only=!$(OTHER_ENDIAN)/m4a-single-only \
+	m4a-single=!m4a-single $(OTHER_ENDIAN)/m4a-single=!$(OTHER_ENDIAN)/m4a-single \
+	m4a=!m4a $(OTHER_ENDIAN)/m4a=!$(OTHER_ENDIAN)/m4a \
+	m4al=!m4al $(OTHER_ENDIAN)/m4al=!$(OTHER_ENDIAN)/m4al \
+	m5-32media=!m5-32media $(OTHER_ENDIAN)/m5-32media=!$(OTHER_ENDIAN)/m5-32media \
+	m5-32media-nofpu=!m5-32media-nofpu $(OTHER_ENDIAN)/m5-32media-nofpu=!$(OTHER_ENDIAN)/m5-32media-nofpu \
+	m5-compact=!m5-compact $(OTHER_ENDIAN)/m5-compact=!$(OTHER_ENDIAN)/m5-compact \
+	m5-compact-nofpu=!m5-compact-nofpu $(OTHER_ENDIAN)/m5-compact-nofpu=!$(OTHER_ENDIAN)/m5-compact-nofpu \
+	m5-64media=!m5-64media $(OTHER_ENDIAN)/m5-64media=!$(OTHER_ENDIAN)/m5-64media \
+	m5-64media-nofpu=!m5-64media-nofpu $(OTHER_ENDIAN)/m5-64media-nofpu=!$(OTHER_ENDIAN)/m5-64media-nofpu
+
+$(out_object_file): gt-sh.h
+gt-sh.h : s-gtype ; @true
+
+# Local Variables:
+# mode: Makefile
+# End:
diff --git a/gcc-4.9/gcc/config/sh/t-sh64 b/gcc-4.9/gcc/config/sh/t-sh64
new file mode 100644
index 000000000..3222099b8
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/t-sh64
@@ -0,0 +1,22 @@
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_CPU_DIRS= $(ML_sh1) $(ML_sh2e) $(ML_sh2) $(ML_sh3e) $(ML_sh3) $(ML_sh4_nofpu) $(ML_sh4_single_only) $(ML_sh4_single) $(ML_sh4) $(ML_sh5_32media:m5-32media/=media32) $(ML_sh5_32media_nofpu:m5-32media-nofpu/=nofpu/media32) $(ML_sh5_compact:m5-compact/=compact) $(ML_sh5_compact_nofpu:m5-compact-nofpu/=nofpu/compact) $(ML_sh5_64media:m5-64media/=media64) $(ML_sh5_64media_nofpu:m5-64media-nofpu/=nofpu/media64)
+
+MULTILIB_RAW_DIRNAMES= $(MULTILIB_ENDIAN:/mb= mb) $(MULTILIB_CPU_DIRS:/=)
+MULTILIB_DIRNAMES= $(MULTILIB_RAW_DIRNAMES)
diff --git a/gcc-4.9/gcc/config/sh/t-vxworks b/gcc-4.9/gcc/config/sh/t-vxworks
new file mode 100644
index 000000000..d7ccc9b7f
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/t-vxworks
@@ -0,0 +1,6 @@
+# Multilibs for VxWorks.
+
+MULTILIB_OPTIONS = mrtp fPIC m2/m3/m4/m4a ml
+# Don't build -fPIC without -mrtp, or -ml without -m3/-m4.
+MULTILIB_EXCEPTIONS = fPIC* ml* mrtp/ml* mrtp/fPIC/ml* *m2/ml*
+MULTILIB_MATCHES = m2=m4-nofpu fPIC=fpic
diff --git a/gcc-4.9/gcc/config/sh/ushmedia.h b/gcc-4.9/gcc/config/sh/ushmedia.h
new file mode 100644
index 000000000..03064e964
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/ushmedia.h
@@ -0,0 +1,1091 @@
+/* Copyright (C) 2000-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+
+/* ushmedia.h: Intrinsics corresponding to SHmedia instructions that
+   may be executed in both user and privileged mode.  */
+
+#ifndef _USHMEDIA_H
+#define _USHMEDIA_H
+
+#if __SHMEDIA__
+#if ! __SH4_NO_FPU
+typedef float __GCC_FV __attribute__ ((vector_size (4 * sizeof (float))));
+typedef float __GCC_MTRX __attribute__ ((vector_size (16 * sizeof (float))));
+#endif
+
+static __inline unsigned long long
+sh_media_MABS_L (unsigned long long mm)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_absv2si2 ((v2si) mm);
+}
+
+static __inline unsigned long long
+sh_media_MABS_W (unsigned long long mm)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_absv4hi2 ((v4hi) mm);
+}
+
+static __inline unsigned long long
+sh_media_MADD_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_addv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MADD_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_addv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MADDS_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_ssaddv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MADDS_UB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_usaddv8qi3 ((v8qi) mm, (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MADDS_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_ssaddv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPEQ_B (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPEQ_B ((v8qi) mm,
+							   (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPEQ_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPEQ_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPEQ_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPEQ_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPGT_UB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPGT_UB ((v8qi) mm,
+							   (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPGT_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPGT_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCMPGT_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MCMPGT_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+#define sh_media_MCMV __builtin_sh_media_MCMV
+
+static __inline unsigned long long
+sh_media_MCNVS_LW (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+  typedef unsigned int uv2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MCNVS_LW ((v2si) mm,
+							   (uv2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCNVS_WB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MCNVS_WB ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MCNVS_WUB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MCNVS_WUB ((v4hi) mm,
+							    (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR1 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR1 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR2 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR2 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR3 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR3 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR4 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR4 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR5 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR5 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR6 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR6 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MEXTR7 (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MEXTR7 ((v8qi) mm,
+							 (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMACFX_WL (unsigned long long mm, unsigned long long mn,
+		    unsigned long long mw)
+{
+  typedef float v2hi __attribute__ ((mode(V2HI)));
+  typedef float v2si __attribute__ ((mode(V2SI)));
+  typedef unsigned int uv2si __attribute__ ((mode(V2SI)));
+
+  long mm_l = (long) mm;
+  long mn_l = (long) mn;
+
+  return ((unsigned long long)
+    __builtin_sh_media_MMACFX_WL ((v2hi) mm_l, (v2hi) mn_l,
+				  (uv2si) mw));
+}
+
+static __inline unsigned long long
+sh_media_MMACNFX_WL (unsigned long long mm, unsigned long long mn,
+		     unsigned long long mw)
+{
+  typedef float v2hi __attribute__ ((mode(V2HI)));
+  typedef float v2si __attribute__ ((mode(V2SI)));
+  typedef unsigned int uv2si __attribute__ ((mode(V2SI)));
+
+  long mm_l = (long) mm;
+  long mn_l = (long) mn;
+
+  return ((unsigned long long)
+    __builtin_sh_media_MMACNFX_WL ((v2hi) mm_l, (v2hi) mn_l,
+				   (uv2si) mw));
+}
+
+static __inline unsigned long long
+sh_media_MMUL_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_mulv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMUL_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_mulv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULFX_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULFX_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULFX_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULFX_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULFXRP_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULFXRP_W ((v4hi) mm,
+							     (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULHI_WL (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULHI_WL ((v4hi) mm,
+							    (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULLO_WL (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MMULLO_WL ((v4hi) mm,
+							    (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MMULSUM_WQ (unsigned long long mm, unsigned long long mn,
+		     unsigned long long mw)
+{
+  typedef unsigned int uv4hi __attribute__ ((mode(V4HI)));
+
+  return __builtin_sh_media_MMULSUM_WQ ((uv4hi) mm, (uv4hi) mn, mw);
+}
+
+static __inline unsigned long long
+sh_media_MPERM_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MPERM_W ((v4hi) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSAD_UBQ (unsigned long long mm, unsigned long long mn,
+		   unsigned long long mw)
+{
+  typedef unsigned int uv8qi __attribute__ ((mode(V8QI)));
+
+  return __builtin_sh_media_MSAD_UBQ ((uv8qi) mm, (uv8qi) mn, mw);
+}
+
+static __inline unsigned long long
+sh_media_MSHALDS_L (unsigned long long mm, unsigned int mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHALDS_L ((v2si) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHALDS_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHALDS_W ((v4hi) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHARD_L (unsigned long long mm, unsigned int mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_ashrv2si3 ((v2si) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHARD_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_ashrv4hi3 ((v4hi) mm, mn);
+}
+
+#define sh_media_MSHARDS_Q __builtin_sh_media_MSHARDS_Q
+
+static __inline unsigned long long
+sh_media_MSHFHI_B (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFHI_B ((v8qi) mm,
+							   (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFHI_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFHI_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFHI_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFHI_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFLO_B (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFLO_B ((v8qi) mm,
+							   (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFLO_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFLO_L ((v2si) mm,
+							   (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHFLO_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sh_media_MSHFLO_W ((v4hi) mm,
+							   (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHLLD_L (unsigned long long mm, unsigned int mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_ashlv2si3 ((v2si) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHLLD_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_ashlv4hi3 ((v4hi) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHLRD_L (unsigned long long mm, unsigned int mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_lshrv2si3 ((v2si) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSHLRD_W (unsigned long long mm, unsigned int mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_lshrv4hi3 ((v4hi) mm, mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUB_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_subv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUB_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_subv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUBS_L (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v2si __attribute__ ((mode(V2SI)));
+
+  return (unsigned long long) __builtin_sssubv2si3 ((v2si) mm, (v2si) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUBS_UB (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_ussubv8qi3 ((v8qi) mm, (v8qi) mn);
+}
+
+static __inline unsigned long long
+sh_media_MSUBS_W (unsigned long long mm, unsigned long long mn)
+{
+  typedef float v4hi __attribute__ ((mode(V4HI)));
+
+  return (unsigned long long) __builtin_sssubv4hi3 ((v4hi) mm, (v4hi) mn);
+}
+
+#if ! __SH4_NOFPU__
+/* Floating-point Intrinsics */
+
+#define sh_media_FABS_D __builtin_fabs
+#define sh_media_FABS_S __builtin_fabsf
+#define sh_media_FCMPUN_D __builtin_isunordered
+#define sh_media_FCMPUN_S __builtin_isunordered
+
+static __inline float sh_media_FCOSA_S (float fg)
+{
+  union { int i; float f; } u;
+
+  u.f = fg;
+  return __builtin_sh_media_FCOSA_S (u.i);
+}
+
+static __inline float
+sh_media_FGETSCR (void)
+{ 
+  float f;
+
+  __asm volatile ("fgetscr %0" : "=f" (f));
+  return f;
+}
+
+static __inline float
+sh_media_FIPR_S (const void *fvg, const void *fvh)
+{
+  typedef float v4sf __attribute__ ((mode(V4SF)));
+  v4sf vg = *(v4sf*) fvg;
+  v4sf vh = *(v4sf*) fvh;
+
+  return __builtin_sh_media_FIPR_S (vg, vh);
+}
+
+#if 0
+/* This gives different results for -O0  */
+static __inline float
+sh_media_FMAC_S (float fg, float fh, float fq)
+{
+  return fg * fh + fq;
+}
+#else
+
+#define sh_media_FMAC_S __builtin_sh_media_FMAC_S
+#endif
+
+static __inline long long
+sh_media_FMOV_DQ (double dg)
+{
+  union { long long l; double d; } u;
+
+  u.d = dg;
+  return u.l;
+}
+
+static __inline float
+sh_media_FMOV_LS (int mm)
+{
+  union { int i; float f; } u;
+
+  u.i = mm;
+  return u.f;
+}
+
+static __inline double
+sh_media_FMOV_QD (long long mm)
+{
+  union { long long l; double d; } u;
+
+  u.l = mm;
+  return u.d;
+}
+
+static __inline int
+sh_media_FMOV_SL (float fg)
+{
+  union { int i; float f; } u;
+
+  u.f = fg;
+  return u.i;
+}
+
+static __inline void
+sh_media_FPUTSCR (float fg)
+{ 
+  __asm volatile ("fputscr %0" : : "f" (fg));
+}
+
+static __inline float sh_media_FSINA_S (float fg)
+{
+  union { int i; float f; } u;
+
+  u.f = fg;
+  return __builtin_sh_media_FSINA_S (u.i);
+}
+
+/* Can't use __builtin_sqrt / __builtin_sqrtf because they still implement
+   error handling unless -ffast-math is used.  */
+#define sh_media_FSQRT_D __builtin_sh_media_FSQRT_D
+#define sh_media_FSQRT_S __builtin_sh_media_FSQRT_S
+#define sh_media_FSRRA_S __builtin_sh_media_FSRRA_S
+
+static __inline void
+sh_media_FTRV_S (const void *mtrxg, const void *fvh, void *fvf)
+{
+  typedef float v16sf __attribute__ ((mode(V16SF)));
+  typedef float v4sf __attribute__ ((mode(V4SF)));
+  v16sf mtrx = *(v16sf*) mtrxg;
+  v4sf vh = *(v4sf*) fvh;
+
+  *(v4sf*) fvf = __builtin_sh_media_FTRV_S (mtrx, vh);
+}
+#endif /* ! __SH4_NOFPU__ */
+
+/* Not implemented here: Control and Configuration intrinsics.  */
+/* Misaligned Access Support intrinsics */
+
+static __inline unsigned long long
+sh_media_LDHI_L (void *p, int s)
+{
+  return __builtin_sh_media_LDHI_L ((char *)p + s);
+}
+
+static __inline unsigned long long
+sh_media_LDHI_Q (void *p, int s)
+{
+  return __builtin_sh_media_LDHI_Q ((char *)p + s);
+}
+
+static __inline unsigned long long
+sh_media_LDLO_L (void *p, int s)
+{
+  return __builtin_sh_media_LDLO_L ((char *)p + s);
+}
+
+static __inline unsigned long long
+sh_media_LDLO_Q (void *p, int s)
+{
+  return __builtin_sh_media_LDLO_Q ((char *)p + s);
+}
+
+static __inline void
+sh_media_STHI_L (void *p, int s, unsigned int mw)
+{
+  __builtin_sh_media_STHI_L ((char*)p + s, mw);
+}
+
+static __inline void
+sh_media_STHI_Q (void *p, int s, unsigned long long mw)
+{
+  __builtin_sh_media_STHI_Q ((char*)p + s, mw);
+}
+
+static __inline void
+sh_media_STLO_L (void *p, int s, unsigned int mw)
+{
+  __builtin_sh_media_STLO_L ((char*)p + s, mw);
+}
+
+static __inline void
+sh_media_STLO_Q (void *p, int s, unsigned long long mw)
+{
+  __builtin_sh_media_STLO_Q ((char*)p + s, mw);
+}
+
+/* Miscellaneous intrinsics */
+
+#define sh_media_NSB __builtin_sh_media_NSB
+
+static __inline unsigned long long
+sh_media_BYTEREV (unsigned long long mm)
+{
+  typedef float v8qi __attribute__ ((mode(V8QI)));
+
+  return (unsigned long long) __builtin_sh_media_BYTEREV ((v8qi) mm);
+}
+
+__inline__ static unsigned long long
+sh_media_CMVEQ (unsigned long long mm, unsigned long long mn, unsigned long long mw) __attribute__ ((always_inline));
+
+__inline__ static unsigned long long
+sh_media_CMVEQ (unsigned long long mm, unsigned long long mn, unsigned long long mw)
+{
+  return mm == 0 ? mn : mw;
+}
+
+__inline__ static unsigned long long
+sh_media_CMVNE (unsigned long long mm, unsigned long long mn, unsigned long long mw) __attribute__ ((always_inline));
+
+__inline__ static unsigned long long
+sh_media_CMVNE (unsigned long long mm, unsigned long long mn, unsigned long long mw)
+{
+  return mm != 0 ? mn : mw;
+}
+
+static __inline long long
+sh_media_ADDZ_L (unsigned int mm, unsigned int mn)
+{
+  return mm + mn;
+}
+
+/* NOP and Synchronization intrinsics not implemented here.  */
+
+static __inline__ void sh_media_PREFO(void *mm, int s)
+{
+  __builtin_sh_media_PREFO (mm + s, 0, 0);
+}
+
+/* Event Handling intrinsics not implemented here.  */
+
+/* Old asm stuff */
+
+static __inline__
+void
+sh_media_NOP (void)
+{
+  __asm__ ("nop" : :);
+}
+
+__inline__ static
+unsigned long long
+sh_media_SWAP_Q (void *mm, long long mn, unsigned long long mw)
+{
+  unsigned long long res;
+  unsigned long long *addr = (unsigned long long *)((char *)mm + mn);
+  __asm__ ("swap.q	%m1, %0" : "=r" (res), "+o" (*addr) : "0" (mw));
+  return res;
+}
+
+__inline__ static
+void     
+sh_media_SYNCI (void)
+{
+  __asm__ __volatile__ ("synci");
+}
+
+__inline__ static
+void     
+sh_media_SYNCO (void)
+{
+  __asm__ __volatile__ ("synco");
+}
+
+__inline__ static
+void
+sh_media_ALLOCO (void *mm, int s)
+{
+  __builtin_sh_media_ALLOCO (mm + s);
+}
+
+__inline__ static
+void
+sh_media_ICBI (void *mm, int s)
+{
+  __asm__ __volatile__ ("icbi	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_OCBI (void *mm, int s)
+{
+  __asm__ __volatile__ ("ocbi	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_OCBP (void *mm, int s)
+{
+  __asm__ __volatile__ ("ocbp	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_OCBWB (void *mm, int s)
+{
+  __asm__ __volatile__ ("ocbwb	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_PREFI (void *mm, int s)
+{
+  __asm__ __volatile__ ("prefi	%m0" : : "o" (((char*)mm)[s]));
+}
+
+__inline__ static
+void
+sh_media_BRK (void)
+{
+  __asm__ __volatile__ ("brk");
+}
+
+__inline__ static
+void
+sh_media_TRAPA (unsigned long long mm)
+{
+  __asm__ __volatile__ ("trapa	%%0" : : "r" (mm));
+}
+
+__inline__ static
+short         
+sh_media_unaligned_LD_W (void *p)
+{
+#if __LITTLE_ENDIAN__
+  return (((unsigned char *)p)[0]
+	  | (((short)((__signed__ char *)p)[1]) << 8));
+#else
+  return ((((short)((__signed__ char *)p)[0]) << 8)
+	  | ((unsigned char *)p)[1]);
+#endif
+}
+
+__inline__ static
+unsigned short
+sh_media_unaligned_LD_UW (void *p)
+{
+  unsigned char *addr = p;
+#if __LITTLE_ENDIAN__
+  return sh_media_MSHFLO_B (addr[0], addr[1]);
+#else
+  return sh_media_MSHFLO_B (addr[1], addr[0]);
+#endif
+}
+
+/* We don't use the sh_media_LD* functions here because that turned out
+   to impede constant propagation of the offsets into the ldhi / ldlo
+   instructions.  */
+__inline__ static
+int           
+sh_media_unaligned_LD_L (void *p)
+{
+#if __LITTLE_ENDIAN__
+  return (__builtin_sh_media_LDHI_L ((char *)p + 3)
+	  | __builtin_sh_media_LDLO_L (p));
+#else
+  return (__builtin_sh_media_LDLO_L ((char *)p + 3)
+	  | __builtin_sh_media_LDHI_L (p));
+#endif
+}
+
+__inline__ static
+long long     
+sh_media_unaligned_LD_Q (void *p)
+{
+#if __LITTLE_ENDIAN__
+  return (__builtin_sh_media_LDHI_Q ((char *)p + 7)
+	  | __builtin_sh_media_LDLO_Q (p));
+#else
+  return (__builtin_sh_media_LDLO_Q ((char *)p + 7)
+	  | __builtin_sh_media_LDHI_Q (p));
+#endif
+}
+
+__inline__ static
+void
+sh_media_unaligned_ST_W (void *p, unsigned int k)
+{
+  char *addr = p;
+#if __LITTLE_ENDIAN__
+  addr[0] = k;
+  addr[1] = k >> 8;
+#else
+  addr[1] = k;
+  addr[0] = k >> 8;
+#endif
+}
+
+/* We don't use the sh_media_ST* functions here because that turned out
+   to impede constant propagation of the offsets into the ldhi / ldlo
+   instructions.  */
+__inline__ static
+void
+sh_media_unaligned_ST_L (void *p, unsigned int k)
+{
+#if __LITTLE_ENDIAN__
+  __builtin_sh_media_STHI_L (p + 3, k);
+  __builtin_sh_media_STLO_L (p, k);
+#else
+  __builtin_sh_media_STLO_L (p + 3, k);
+  __builtin_sh_media_STHI_L (p, k);
+#endif
+}
+
+__inline__ static
+void
+sh_media_unaligned_ST_Q (void *p, unsigned long long k)
+{
+#if __LITTLE_ENDIAN__
+  __builtin_sh_media_STHI_Q (p + 7, k);
+  __builtin_sh_media_STLO_Q (p, k);
+#else
+  __builtin_sh_media_STLO_Q (p + 7, k);
+  __builtin_sh_media_STHI_Q (p, k);
+#endif
+}
+
+#if ! __SH4_NOFPU__
+__inline__ static
+void
+sh_media_FVCOPY_S (const void *fvg, void *fvf)
+{
+  const __GCC_FV *g = fvg;
+  __GCC_FV *f = fvf;
+  *f = *g;
+}
+
+__inline__ static
+void
+sh_media_FVADD_S (const void *fvg, const void *fvh, void *fvf)
+{
+  const float *g = fvg, *h = fvh;
+  float *f = fvf;
+#if 1
+  int i;
+
+  for (i = 0; i < 4; i++)
+    f[i] = g[i] + h[i];
+#else
+  f[0] = g[0] + h[0];
+  f[1] = g[1] + h[1];
+  f[2] = g[2] + h[2];
+  f[3] = g[3] + h[3];
+#endif
+}
+
+__inline__ static
+void
+sh_media_FVSUB_S (const void *fvg, const void *fvh, void *fvf)
+{
+  const float *g = fvg, *h = fvh;
+  float *f = fvf;
+#if 1
+  int i;
+
+  for (i = 0; i < 4; i++)
+    f[i] = g[i] - h[i];
+#else
+  f[0] = g[0] - h[0];
+  f[1] = g[1] - h[1];
+  f[2] = g[2] - h[2];
+  f[3] = g[3] - h[3];
+#endif
+}
+
+__inline__ static
+void
+sh_media_FMTRXCOPY_S (const void *mtrxg, void *mtrxf)
+{
+  const __GCC_MTRX *g = mtrxg;
+  __GCC_MTRX *f = mtrxf;
+  *f = *g;
+}
+
+__inline__ static
+void
+sh_media_FMTRXADD_S (const void *mtrxg, const void *mtrxh, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg, *h = mtrxh;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int i;
+
+  for (i = 0; i < 4; i++)
+    sh_media_FVADD_S (&g[i], &h[i], &f[i]);
+#else
+  sh_media_FVADD_S (&g[0], &h[0], &f[0]);
+  sh_media_FVADD_S (&g[1], &h[1], &f[1]);
+  sh_media_FVADD_S (&g[2], &h[2], &f[2]);
+  sh_media_FVADD_S (&g[3], &h[3], &f[3]);
+#endif
+}
+
+__inline__ static
+void
+sh_media_FMTRXSUB_S (const void *mtrxg, const void *mtrxh, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg, *h = mtrxh;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int i;
+
+  for (i = 0; i < 4; i++)
+    sh_media_FVSUB_S (&g[i], &h[i], &f[i]);
+#else
+  sh_media_FVSUB_S (&g[0], &h[0], &f[0]);
+  sh_media_FVSUB_S (&g[1], &h[1], &f[1]);
+  sh_media_FVSUB_S (&g[2], &h[2], &f[2]);
+  sh_media_FVSUB_S (&g[3], &h[3], &f[3]);
+#endif
+}
+
+__inline__ static
+void
+sh_media_FTRVADD_S (const void *mtrxg, const void *fvh, const void *fvi,
+		    void *fvf)
+{
+  sh_media_FTRV_S (mtrxg, fvh, fvf);
+  sh_media_FVADD_S (fvf, fvi, fvf);
+}
+
+__inline__ static
+void
+sh_media_FTRVSUB_S (const void *mtrxg, const void *fvh, const void *fvi,
+		    void *fvf)
+{
+  sh_media_FTRV_S (mtrxg, fvh, fvf);
+  sh_media_FVSUB_S (fvf, fvi, fvf);
+}
+
+__inline__ static
+void
+sh_media_FMTRXMUL_S (const void *mtrxg, const void *mtrxh, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int j;
+
+  for (j = 0; j < 4; j++)
+    sh_media_FTRV_S (mtrxh, &g[j], &f[j]);
+#else
+  sh_media_FTRV_S (mtrxh, &g[0], &f[0]);
+  sh_media_FTRV_S (mtrxh, &g[1], &f[1]);
+  sh_media_FTRV_S (mtrxh, &g[2], &f[2]);
+  sh_media_FTRV_S (mtrxh, &g[3], &f[3]);
+#endif
+}
+
+__inline__ static
+void
+sh_media_FMTRXMULADD_S (const void *mtrxg, const void *mtrxh,
+			const void *mtrxi, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg, *i = mtrxi;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int j;
+
+  for (j = 0; j < 4; j++)
+    sh_media_FTRVADD_S (mtrxh, &g[j], &i[j], &f[j]);
+#else
+  sh_media_FTRVADD_S (mtrxh, &g[0], &i[0], &f[0]);
+  sh_media_FTRVADD_S (mtrxh, &g[1], &i[1], &f[1]);
+  sh_media_FTRVADD_S (mtrxh, &g[2], &i[2], &f[2]);
+  sh_media_FTRVADD_S (mtrxh, &g[3], &i[3], &f[3]);
+#endif
+}
+
+__inline__ static
+void
+sh_media_FMTRXMULSUB_S (const void *mtrxg, const void *mtrxh,
+			const void *mtrxi, void *mtrxf)
+{
+  const __GCC_FV *g = mtrxg, *i = mtrxi;
+  __GCC_FV *f = mtrxf;
+#if 1
+  int j;
+
+  for (j = 0; j < 4; j++)
+    sh_media_FTRVSUB_S (mtrxh, &g[j], &i[j], &f[j]);
+#else
+  sh_media_FTRVSUB_S (mtrxh, &g[0], &i[0], &f[0]);
+  sh_media_FTRVSUB_S (mtrxh, &g[1], &i[1], &f[1]);
+  sh_media_FTRVSUB_S (mtrxh, &g[2], &i[2], &f[2]);
+  sh_media_FTRVSUB_S (mtrxh, &g[3], &i[3], &f[3]);
+#endif
+}
+#endif /* ! __SH4_NOFPU__ */
+
+#endif /* __SHMEDIA__ */
+
+#endif /* _USHMEDIA_H */
diff --git a/gcc-4.9/gcc/config/sh/vxworks.h b/gcc-4.9/gcc/config/sh/vxworks.h
new file mode 100644
index 000000000..15dae73c7
--- /dev/null
+++ b/gcc-4.9/gcc/config/sh/vxworks.h
@@ -0,0 +1,66 @@
+/* Definitions of target machine for GCC,
+   for SuperH with targeting the VXWorks run time environment. 
+   Copyright (C) 2003-2014 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC.
+   
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#define TARGET_OS_CPP_BUILTINS()	\
+  do					\
+    {					\
+      builtin_define ("CPU=SH7000");	\
+      VXWORKS_OS_CPP_BUILTINS ();	\
+    }					\
+  while (0)
+
+#undef SUBTARGET_OVERRIDE_OPTIONS
+#define SUBTARGET_OVERRIDE_OPTIONS				\
+  do								\
+    {								\
+      VXWORKS_OVERRIDE_OPTIONS;					\
+      /* The kernel loader cannot handle the relaxation		\
+	 relocations, so it cannot load kernel modules		\
+	 (which are ET_REL) or RTP executables (which are	\
+	 linked with --emit-relocs).  No relaxation relocations	\
+	 appear in shared libraries, so relaxation is OK	\
+	 for RTP PIC.  */					\
+      if (TARGET_RELAX && !(TARGET_VXWORKS_RTP && flag_pic))	\
+	error ("-mrelax is only supported for RTP PIC");	\
+    }								\
+  while (0)
+
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC VXWORKS_ADDITIONAL_CPP_SPEC
+
+#undef SUBTARGET_LINK_EMUL_SUFFIX
+#define SUBTARGET_LINK_EMUL_SUFFIX "_vxworks"
+
+#undef LIB_SPEC
+#define LIB_SPEC VXWORKS_LIB_SPEC
+#undef LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC " " SH_LINK_SPEC
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
+
+/* There is no default multilib.  */
+#undef MULTILIB_DEFAULTS
+
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER VXWORKS_FUNCTION_PROFILER